/* armv8-32-sha256-asm
 *
 * Copyright (C) 2006-2023 wolfSSL Inc.
 *
 * This file is part of wolfSSL.
 *
 * wolfSSL is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * wolfSSL is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
 */

/* Generated using (from wolfssl):
 *   cd ../scripts
 *   ruby ./sha2/sha256.rb arm32 ../wolfssl/wolfcrypt/src/port/arm/armv8-32-sha256-asm.c
 */

#ifdef HAVE_CONFIG_H
    #include <config.h>
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/error-crypt.h>

#ifdef WOLFSSL_ARMASM
#if !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__)
#include <stdint.h>
#ifdef HAVE_CONFIG_H
    #include <config.h>
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#ifdef WOLFSSL_ARMASM_INLINE

#ifdef WOLFSSL_ARMASM
#if !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__)

#ifdef __IAR_SYSTEMS_ICC__
#define __asm__        asm
#define __volatile__   volatile
#endif /* __IAR_SYSTEMS_ICC__ */
#ifdef __KEIL__
#define __asm__        __asm
#define __volatile__   volatile
#endif /* __KEIL__ */
#ifndef NO_SHA256
#include <wolfssl/wolfcrypt/sha256.h>

#ifdef WOLFSSL_ARMASM_NO_NEON
static const uint32_t L_SHA256_transform_len_k[] = {
    0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
    0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
    0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
    0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
    0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
    0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
    0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
    0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
    0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
    0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
    0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
    0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
    0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
    0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
    0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
    0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
};

void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len);
void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
{
    register wc_Sha256* sha256 asm ("r0") = (wc_Sha256*)sha256_p;
    register const byte* data asm ("r1") = (const byte*)data_p;
    register word32 len asm ("r2") = (word32)len_p;
    register uint32_t* L_SHA256_transform_len_k_c asm ("r3") = (uint32_t*)&L_SHA256_transform_len_k;

    __asm__ __volatile__ (
        "sub	sp, sp, #0xc0\n\t"
        /* Copy digest to add in at end */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "ldr	r4, [%[sha256]]\n\t"
        "ldr	r5, [%[sha256], #4]\n\t"
#else
        "ldrd	r4, r5, [%[sha256]]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "ldr	r6, [%[sha256], #8]\n\t"
        "ldr	r7, [%[sha256], #12]\n\t"
#else
        "ldrd	r6, r7, [%[sha256], #8]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "ldr	r8, [%[sha256], #16]\n\t"
        "ldr	r9, [%[sha256], #20]\n\t"
#else
        "ldrd	r8, r9, [%[sha256], #16]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "ldr	r10, [%[sha256], #24]\n\t"
        "ldr	r11, [%[sha256], #28]\n\t"
#else
        "ldrd	r10, r11, [%[sha256], #24]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "str	r4, [sp, #64]\n\t"
        "str	r5, [sp, #68]\n\t"
#else
        "strd	r4, r5, [sp, #64]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "str	r6, [sp, #72]\n\t"
        "str	r7, [sp, #76]\n\t"
#else
        "strd	r6, r7, [sp, #72]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "str	r8, [sp, #80]\n\t"
        "str	r9, [sp, #84]\n\t"
#else
        "strd	r8, r9, [sp, #80]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "str	r10, [sp, #88]\n\t"
        "str	r11, [sp, #92]\n\t"
#else
        "strd	r10, r11, [sp, #88]\n\t"
#endif
        /* Start of loop processing a block */
        "\n"
    "L_SHA256_transform_len_begin_%=: \n\t"
        /* Load, Reverse and Store W - 64 bytes */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
        "ldr	r4, [%[data]]\n\t"
        "ldr	r5, [%[data], #4]\n\t"
        "ldr	r6, [%[data], #8]\n\t"
        "ldr	r7, [%[data], #12]\n\t"
        "eor	r8, r4, r4, ror #16\n\t"
        "eor	r9, r5, r5, ror #16\n\t"
        "eor	r10, r6, r6, ror #16\n\t"
        "eor	r11, r7, r7, ror #16\n\t"
        "bic	r8, r8, #0xff0000\n\t"
        "bic	r9, r9, #0xff0000\n\t"
        "bic	r10, r10, #0xff0000\n\t"
        "bic	r11, r11, #0xff0000\n\t"
        "ror	r4, r4, #8\n\t"
        "ror	r5, r5, #8\n\t"
        "ror	r6, r6, #8\n\t"
        "ror	r7, r7, #8\n\t"
        "eor	r4, r4, r8, lsr #8\n\t"
        "eor	r5, r5, r9, lsr #8\n\t"
        "eor	r6, r6, r10, lsr #8\n\t"
        "eor	r7, r7, r11, lsr #8\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "str	r4, [sp]\n\t"
        "str	r5, [sp, #4]\n\t"
#else
        "strd	r4, r5, [sp]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "str	r6, [sp, #8]\n\t"
        "str	r7, [sp, #12]\n\t"
#else
        "strd	r6, r7, [sp, #8]\n\t"
#endif
        "ldr	r4, [%[data], #16]\n\t"
        "ldr	r5, [%[data], #20]\n\t"
        "ldr	r6, [%[data], #24]\n\t"
        "ldr	r7, [%[data], #28]\n\t"
        "eor	r8, r4, r4, ror #16\n\t"
        "eor	r9, r5, r5, ror #16\n\t"
        "eor	r10, r6, r6, ror #16\n\t"
        "eor	r11, r7, r7, ror #16\n\t"
        "bic	r8, r8, #0xff0000\n\t"
        "bic	r9, r9, #0xff0000\n\t"
        "bic	r10, r10, #0xff0000\n\t"
        "bic	r11, r11, #0xff0000\n\t"
        "ror	r4, r4, #8\n\t"
        "ror	r5, r5, #8\n\t"
        "ror	r6, r6, #8\n\t"
        "ror	r7, r7, #8\n\t"
        "eor	r4, r4, r8, lsr #8\n\t"
        "eor	r5, r5, r9, lsr #8\n\t"
        "eor	r6, r6, r10, lsr #8\n\t"
        "eor	r7, r7, r11, lsr #8\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "str	r4, [sp, #16]\n\t"
        "str	r5, [sp, #20]\n\t"
#else
        "strd	r4, r5, [sp, #16]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "str	r6, [sp, #24]\n\t"
        "str	r7, [sp, #28]\n\t"
#else
        "strd	r6, r7, [sp, #24]\n\t"
#endif
        "ldr	r4, [%[data], #32]\n\t"
        "ldr	r5, [%[data], #36]\n\t"
        "ldr	r6, [%[data], #40]\n\t"
        "ldr	r7, [%[data], #44]\n\t"
        "eor	r8, r4, r4, ror #16\n\t"
        "eor	r9, r5, r5, ror #16\n\t"
        "eor	r10, r6, r6, ror #16\n\t"
        "eor	r11, r7, r7, ror #16\n\t"
        "bic	r8, r8, #0xff0000\n\t"
        "bic	r9, r9, #0xff0000\n\t"
        "bic	r10, r10, #0xff0000\n\t"
        "bic	r11, r11, #0xff0000\n\t"
        "ror	r4, r4, #8\n\t"
        "ror	r5, r5, #8\n\t"
        "ror	r6, r6, #8\n\t"
        "ror	r7, r7, #8\n\t"
        "eor	r4, r4, r8, lsr #8\n\t"
        "eor	r5, r5, r9, lsr #8\n\t"
        "eor	r6, r6, r10, lsr #8\n\t"
        "eor	r7, r7, r11, lsr #8\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "str	r4, [sp, #32]\n\t"
        "str	r5, [sp, #36]\n\t"
#else
        "strd	r4, r5, [sp, #32]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "str	r6, [sp, #40]\n\t"
        "str	r7, [sp, #44]\n\t"
#else
        "strd	r6, r7, [sp, #40]\n\t"
#endif
        "ldr	r4, [%[data], #48]\n\t"
        "ldr	r5, [%[data], #52]\n\t"
        "ldr	r6, [%[data], #56]\n\t"
        "ldr	r7, [%[data], #60]\n\t"
        "eor	r8, r4, r4, ror #16\n\t"
        "eor	r9, r5, r5, ror #16\n\t"
        "eor	r10, r6, r6, ror #16\n\t"
        "eor	r11, r7, r7, ror #16\n\t"
        "bic	r8, r8, #0xff0000\n\t"
        "bic	r9, r9, #0xff0000\n\t"
        "bic	r10, r10, #0xff0000\n\t"
        "bic	r11, r11, #0xff0000\n\t"
        "ror	r4, r4, #8\n\t"
        "ror	r5, r5, #8\n\t"
        "ror	r6, r6, #8\n\t"
        "ror	r7, r7, #8\n\t"
        "eor	r4, r4, r8, lsr #8\n\t"
        "eor	r5, r5, r9, lsr #8\n\t"
        "eor	r6, r6, r10, lsr #8\n\t"
        "eor	r7, r7, r11, lsr #8\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "str	r4, [sp, #48]\n\t"
        "str	r5, [sp, #52]\n\t"
#else
        "strd	r4, r5, [sp, #48]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "str	r6, [sp, #56]\n\t"
        "str	r7, [sp, #60]\n\t"
#else
        "strd	r6, r7, [sp, #56]\n\t"
#endif
#else
        "ldr	r4, [%[data]]\n\t"
        "ldr	r5, [%[data], #4]\n\t"
        "ldr	r6, [%[data], #8]\n\t"
        "ldr	r7, [%[data], #12]\n\t"
        "ldr	r8, [%[data], #16]\n\t"
        "ldr	r9, [%[data], #20]\n\t"
        "ldr	r10, [%[data], #24]\n\t"
        "ldr	r11, [%[data], #28]\n\t"
        "rev	r4, r4\n\t"
        "rev	r5, r5\n\t"
        "rev	r6, r6\n\t"
        "rev	r7, r7\n\t"
        "rev	r8, r8\n\t"
        "rev	r9, r9\n\t"
        "rev	r10, r10\n\t"
        "rev	r11, r11\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "str	r4, [sp]\n\t"
        "str	r5, [sp, #4]\n\t"
#else
        "strd	r4, r5, [sp]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "str	r6, [sp, #8]\n\t"
        "str	r7, [sp, #12]\n\t"
#else
        "strd	r6, r7, [sp, #8]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "str	r8, [sp, #16]\n\t"
        "str	r9, [sp, #20]\n\t"
#else
        "strd	r8, r9, [sp, #16]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "str	r10, [sp, #24]\n\t"
        "str	r11, [sp, #28]\n\t"
#else
        "strd	r10, r11, [sp, #24]\n\t"
#endif
        "ldr	r4, [%[data], #32]\n\t"
        "ldr	r5, [%[data], #36]\n\t"
        "ldr	r6, [%[data], #40]\n\t"
        "ldr	r7, [%[data], #44]\n\t"
        "ldr	r8, [%[data], #48]\n\t"
        "ldr	r9, [%[data], #52]\n\t"
        "ldr	r10, [%[data], #56]\n\t"
        "ldr	r11, [%[data], #60]\n\t"
        "rev	r4, r4\n\t"
        "rev	r5, r5\n\t"
        "rev	r6, r6\n\t"
        "rev	r7, r7\n\t"
        "rev	r8, r8\n\t"
        "rev	r9, r9\n\t"
        "rev	r10, r10\n\t"
        "rev	r11, r11\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "str	r4, [sp, #32]\n\t"
        "str	r5, [sp, #36]\n\t"
#else
        "strd	r4, r5, [sp, #32]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "str	r6, [sp, #40]\n\t"
        "str	r7, [sp, #44]\n\t"
#else
        "strd	r6, r7, [sp, #40]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "str	r8, [sp, #48]\n\t"
        "str	r9, [sp, #52]\n\t"
#else
        "strd	r8, r9, [sp, #48]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "str	r10, [sp, #56]\n\t"
        "str	r11, [sp, #60]\n\t"
#else
        "strd	r10, r11, [sp, #56]\n\t"
#endif
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */
        "ldr	r11, [%[sha256], #4]\n\t"
        "ldr	r4, [%[sha256], #8]\n\t"
        "eor	r11, r11, r4\n\t"
        "mov	r12, #3\n\t"
        /* Start of 16 rounds */
        "\n"
    "L_SHA256_transform_len_start_%=: \n\t"
        /* Round 0 */
        "ldr	r5, [%[sha256], #16]\n\t"
        "ldr	r6, [%[sha256], #20]\n\t"
        "ldr	r7, [%[sha256], #24]\n\t"
        "ldr	r9, [%[sha256], #28]\n\t"
        "ror	r4, r5, #6\n\t"
        "eor	r6, r6, r7\n\t"
        "eor	r4, r4, r5, ror #11\n\t"
        "and	r6, r6, r5\n\t"
        "eor	r4, r4, r5, ror #25\n\t"
        "eor	r6, r6, r7\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [sp]\n\t"
        "ldr	r6, [r3]\n\t"
        "add	r9, r9, r5\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [%[sha256]]\n\t"
        "ldr	r6, [%[sha256], #4]\n\t"
        "ldr	r7, [%[sha256], #8]\n\t"
        "ldr	r8, [%[sha256], #12]\n\t"
        "ror	r4, r5, #2\n\t"
        "eor	r10, r5, r6\n\t"
        "eor	r4, r4, r5, ror #13\n\t"
        "and	r11, r11, r10\n\t"
        "eor	r4, r4, r5, ror #22\n\t"
        "eor	r11, r11, r6\n\t"
        "add	r8, r8, r9\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r11\n\t"
        "str	r8, [%[sha256], #12]\n\t"
        "str	r9, [%[sha256], #28]\n\t"
        /* Calc new W[0] */
        "ldr	r6, [sp, #56]\n\t"
        "ldr	r7, [sp, #36]\n\t"
        "ldr	r8, [sp, #4]\n\t"
        "ldr	r9, [sp]\n\t"
        "ror	r4, r6, #17\n\t"
        "ror	r5, r8, #7\n\t"
        "eor	r4, r4, r6, ror #19\n\t"
        "eor	r5, r5, r8, ror #18\n\t"
        "eor	r4, r4, r6, lsr #10\n\t"
        "eor	r5, r5, r8, lsr #3\n\t"
        "add	r9, r9, r7\n\t"
        "add	r4, r4, r5\n\t"
        "add	r9, r9, r4\n\t"
        "str	r9, [sp]\n\t"
        /* Round 1 */
        "ldr	r5, [%[sha256], #12]\n\t"
        "ldr	r6, [%[sha256], #16]\n\t"
        "ldr	r7, [%[sha256], #20]\n\t"
        "ldr	r9, [%[sha256], #24]\n\t"
        "ror	r4, r5, #6\n\t"
        "eor	r6, r6, r7\n\t"
        "eor	r4, r4, r5, ror #11\n\t"
        "and	r6, r6, r5\n\t"
        "eor	r4, r4, r5, ror #25\n\t"
        "eor	r6, r6, r7\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [sp, #4]\n\t"
        "ldr	r6, [r3, #4]\n\t"
        "add	r9, r9, r5\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [%[sha256], #28]\n\t"
        "ldr	r6, [%[sha256]]\n\t"
        "ldr	r7, [%[sha256], #4]\n\t"
        "ldr	r8, [%[sha256], #8]\n\t"
        "ror	r4, r5, #2\n\t"
        "eor	r11, r5, r6\n\t"
        "eor	r4, r4, r5, ror #13\n\t"
        "and	r10, r10, r11\n\t"
        "eor	r4, r4, r5, ror #22\n\t"
        "eor	r10, r10, r6\n\t"
        "add	r8, r8, r9\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r10\n\t"
        "str	r8, [%[sha256], #8]\n\t"
        "str	r9, [%[sha256], #24]\n\t"
        /* Calc new W[1] */
        "ldr	r6, [sp, #60]\n\t"
        "ldr	r7, [sp, #40]\n\t"
        "ldr	r8, [sp, #8]\n\t"
        "ldr	r9, [sp, #4]\n\t"
        "ror	r4, r6, #17\n\t"
        "ror	r5, r8, #7\n\t"
        "eor	r4, r4, r6, ror #19\n\t"
        "eor	r5, r5, r8, ror #18\n\t"
        "eor	r4, r4, r6, lsr #10\n\t"
        "eor	r5, r5, r8, lsr #3\n\t"
        "add	r9, r9, r7\n\t"
        "add	r4, r4, r5\n\t"
        "add	r9, r9, r4\n\t"
        "str	r9, [sp, #4]\n\t"
        /* Round 2 */
        "ldr	r5, [%[sha256], #8]\n\t"
        "ldr	r6, [%[sha256], #12]\n\t"
        "ldr	r7, [%[sha256], #16]\n\t"
        "ldr	r9, [%[sha256], #20]\n\t"
        "ror	r4, r5, #6\n\t"
        "eor	r6, r6, r7\n\t"
        "eor	r4, r4, r5, ror #11\n\t"
        "and	r6, r6, r5\n\t"
        "eor	r4, r4, r5, ror #25\n\t"
        "eor	r6, r6, r7\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [sp, #8]\n\t"
        "ldr	r6, [r3, #8]\n\t"
        "add	r9, r9, r5\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [%[sha256], #24]\n\t"
        "ldr	r6, [%[sha256], #28]\n\t"
        "ldr	r7, [%[sha256]]\n\t"
        "ldr	r8, [%[sha256], #4]\n\t"
        "ror	r4, r5, #2\n\t"
        "eor	r10, r5, r6\n\t"
        "eor	r4, r4, r5, ror #13\n\t"
        "and	r11, r11, r10\n\t"
        "eor	r4, r4, r5, ror #22\n\t"
        "eor	r11, r11, r6\n\t"
        "add	r8, r8, r9\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r11\n\t"
        "str	r8, [%[sha256], #4]\n\t"
        "str	r9, [%[sha256], #20]\n\t"
        /* Calc new W[2] */
        "ldr	r6, [sp]\n\t"
        "ldr	r7, [sp, #44]\n\t"
        "ldr	r8, [sp, #12]\n\t"
        "ldr	r9, [sp, #8]\n\t"
        "ror	r4, r6, #17\n\t"
        "ror	r5, r8, #7\n\t"
        "eor	r4, r4, r6, ror #19\n\t"
        "eor	r5, r5, r8, ror #18\n\t"
        "eor	r4, r4, r6, lsr #10\n\t"
        "eor	r5, r5, r8, lsr #3\n\t"
        "add	r9, r9, r7\n\t"
        "add	r4, r4, r5\n\t"
        "add	r9, r9, r4\n\t"
        "str	r9, [sp, #8]\n\t"
        /* Round 3 */
        "ldr	r5, [%[sha256], #4]\n\t"
        "ldr	r6, [%[sha256], #8]\n\t"
        "ldr	r7, [%[sha256], #12]\n\t"
        "ldr	r9, [%[sha256], #16]\n\t"
        "ror	r4, r5, #6\n\t"
        "eor	r6, r6, r7\n\t"
        "eor	r4, r4, r5, ror #11\n\t"
        "and	r6, r6, r5\n\t"
        "eor	r4, r4, r5, ror #25\n\t"
        "eor	r6, r6, r7\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [sp, #12]\n\t"
        "ldr	r6, [r3, #12]\n\t"
        "add	r9, r9, r5\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [%[sha256], #20]\n\t"
        "ldr	r6, [%[sha256], #24]\n\t"
        "ldr	r7, [%[sha256], #28]\n\t"
        "ldr	r8, [%[sha256]]\n\t"
        "ror	r4, r5, #2\n\t"
        "eor	r11, r5, r6\n\t"
        "eor	r4, r4, r5, ror #13\n\t"
        "and	r10, r10, r11\n\t"
        "eor	r4, r4, r5, ror #22\n\t"
        "eor	r10, r10, r6\n\t"
        "add	r8, r8, r9\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r10\n\t"
        "str	r8, [%[sha256]]\n\t"
        "str	r9, [%[sha256], #16]\n\t"
        /* Calc new W[3] */
        "ldr	r6, [sp, #4]\n\t"
        "ldr	r7, [sp, #48]\n\t"
        "ldr	r8, [sp, #16]\n\t"
        "ldr	r9, [sp, #12]\n\t"
        "ror	r4, r6, #17\n\t"
        "ror	r5, r8, #7\n\t"
        "eor	r4, r4, r6, ror #19\n\t"
        "eor	r5, r5, r8, ror #18\n\t"
        "eor	r4, r4, r6, lsr #10\n\t"
        "eor	r5, r5, r8, lsr #3\n\t"
        "add	r9, r9, r7\n\t"
        "add	r4, r4, r5\n\t"
        "add	r9, r9, r4\n\t"
        "str	r9, [sp, #12]\n\t"
        /* Round 4 */
        "ldr	r5, [%[sha256]]\n\t"
        "ldr	r6, [%[sha256], #4]\n\t"
        "ldr	r7, [%[sha256], #8]\n\t"
        "ldr	r9, [%[sha256], #12]\n\t"
        "ror	r4, r5, #6\n\t"
        "eor	r6, r6, r7\n\t"
        "eor	r4, r4, r5, ror #11\n\t"
        "and	r6, r6, r5\n\t"
        "eor	r4, r4, r5, ror #25\n\t"
        "eor	r6, r6, r7\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [sp, #16]\n\t"
        "ldr	r6, [r3, #16]\n\t"
        "add	r9, r9, r5\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [%[sha256], #16]\n\t"
        "ldr	r6, [%[sha256], #20]\n\t"
        "ldr	r7, [%[sha256], #24]\n\t"
        "ldr	r8, [%[sha256], #28]\n\t"
        "ror	r4, r5, #2\n\t"
        "eor	r10, r5, r6\n\t"
        "eor	r4, r4, r5, ror #13\n\t"
        "and	r11, r11, r10\n\t"
        "eor	r4, r4, r5, ror #22\n\t"
        "eor	r11, r11, r6\n\t"
        "add	r8, r8, r9\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r11\n\t"
        "str	r8, [%[sha256], #28]\n\t"
        "str	r9, [%[sha256], #12]\n\t"
        /* Calc new W[4] */
        "ldr	r6, [sp, #8]\n\t"
        "ldr	r7, [sp, #52]\n\t"
        "ldr	r8, [sp, #20]\n\t"
        "ldr	r9, [sp, #16]\n\t"
        "ror	r4, r6, #17\n\t"
        "ror	r5, r8, #7\n\t"
        "eor	r4, r4, r6, ror #19\n\t"
        "eor	r5, r5, r8, ror #18\n\t"
        "eor	r4, r4, r6, lsr #10\n\t"
        "eor	r5, r5, r8, lsr #3\n\t"
        "add	r9, r9, r7\n\t"
        "add	r4, r4, r5\n\t"
        "add	r9, r9, r4\n\t"
        "str	r9, [sp, #16]\n\t"
        /* Round 5 */
        "ldr	r5, [%[sha256], #28]\n\t"
        "ldr	r6, [%[sha256]]\n\t"
        "ldr	r7, [%[sha256], #4]\n\t"
        "ldr	r9, [%[sha256], #8]\n\t"
        "ror	r4, r5, #6\n\t"
        "eor	r6, r6, r7\n\t"
        "eor	r4, r4, r5, ror #11\n\t"
        "and	r6, r6, r5\n\t"
        "eor	r4, r4, r5, ror #25\n\t"
        "eor	r6, r6, r7\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [sp, #20]\n\t"
        "ldr	r6, [r3, #20]\n\t"
        "add	r9, r9, r5\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [%[sha256], #12]\n\t"
        "ldr	r6, [%[sha256], #16]\n\t"
        "ldr	r7, [%[sha256], #20]\n\t"
        "ldr	r8, [%[sha256], #24]\n\t"
        "ror	r4, r5, #2\n\t"
        "eor	r11, r5, r6\n\t"
        "eor	r4, r4, r5, ror #13\n\t"
        "and	r10, r10, r11\n\t"
        "eor	r4, r4, r5, ror #22\n\t"
        "eor	r10, r10, r6\n\t"
        "add	r8, r8, r9\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r10\n\t"
        "str	r8, [%[sha256], #24]\n\t"
        "str	r9, [%[sha256], #8]\n\t"
        /* Calc new W[5] */
        "ldr	r6, [sp, #12]\n\t"
        "ldr	r7, [sp, #56]\n\t"
        "ldr	r8, [sp, #24]\n\t"
        "ldr	r9, [sp, #20]\n\t"
        "ror	r4, r6, #17\n\t"
        "ror	r5, r8, #7\n\t"
        "eor	r4, r4, r6, ror #19\n\t"
        "eor	r5, r5, r8, ror #18\n\t"
        "eor	r4, r4, r6, lsr #10\n\t"
        "eor	r5, r5, r8, lsr #3\n\t"
        "add	r9, r9, r7\n\t"
        "add	r4, r4, r5\n\t"
        "add	r9, r9, r4\n\t"
        "str	r9, [sp, #20]\n\t"
        /* Round 6 */
        "ldr	r5, [%[sha256], #24]\n\t"
        "ldr	r6, [%[sha256], #28]\n\t"
        "ldr	r7, [%[sha256]]\n\t"
        "ldr	r9, [%[sha256], #4]\n\t"
        "ror	r4, r5, #6\n\t"
        "eor	r6, r6, r7\n\t"
        "eor	r4, r4, r5, ror #11\n\t"
        "and	r6, r6, r5\n\t"
        "eor	r4, r4, r5, ror #25\n\t"
        "eor	r6, r6, r7\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [sp, #24]\n\t"
        "ldr	r6, [r3, #24]\n\t"
        "add	r9, r9, r5\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [%[sha256], #8]\n\t"
        "ldr	r6, [%[sha256], #12]\n\t"
        "ldr	r7, [%[sha256], #16]\n\t"
        "ldr	r8, [%[sha256], #20]\n\t"
        "ror	r4, r5, #2\n\t"
        "eor	r10, r5, r6\n\t"
        "eor	r4, r4, r5, ror #13\n\t"
        "and	r11, r11, r10\n\t"
        "eor	r4, r4, r5, ror #22\n\t"
        "eor	r11, r11, r6\n\t"
        "add	r8, r8, r9\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r11\n\t"
        "str	r8, [%[sha256], #20]\n\t"
        "str	r9, [%[sha256], #4]\n\t"
        /* Calc new W[6] */
        "ldr	r6, [sp, #16]\n\t"
        "ldr	r7, [sp, #60]\n\t"
        "ldr	r8, [sp, #28]\n\t"
        "ldr	r9, [sp, #24]\n\t"
        "ror	r4, r6, #17\n\t"
        "ror	r5, r8, #7\n\t"
        "eor	r4, r4, r6, ror #19\n\t"
        "eor	r5, r5, r8, ror #18\n\t"
        "eor	r4, r4, r6, lsr #10\n\t"
        "eor	r5, r5, r8, lsr #3\n\t"
        "add	r9, r9, r7\n\t"
        "add	r4, r4, r5\n\t"
        "add	r9, r9, r4\n\t"
        "str	r9, [sp, #24]\n\t"
        /* Round 7 */
        "ldr	r5, [%[sha256], #20]\n\t"
        "ldr	r6, [%[sha256], #24]\n\t"
        "ldr	r7, [%[sha256], #28]\n\t"
        "ldr	r9, [%[sha256]]\n\t"
        "ror	r4, r5, #6\n\t"
        "eor	r6, r6, r7\n\t"
        "eor	r4, r4, r5, ror #11\n\t"
        "and	r6, r6, r5\n\t"
        "eor	r4, r4, r5, ror #25\n\t"
        "eor	r6, r6, r7\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [sp, #28]\n\t"
        "ldr	r6, [r3, #28]\n\t"
        "add	r9, r9, r5\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [%[sha256], #4]\n\t"
        "ldr	r6, [%[sha256], #8]\n\t"
        "ldr	r7, [%[sha256], #12]\n\t"
        "ldr	r8, [%[sha256], #16]\n\t"
        "ror	r4, r5, #2\n\t"
        "eor	r11, r5, r6\n\t"
        "eor	r4, r4, r5, ror #13\n\t"
        "and	r10, r10, r11\n\t"
        "eor	r4, r4, r5, ror #22\n\t"
        "eor	r10, r10, r6\n\t"
        "add	r8, r8, r9\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r10\n\t"
        "str	r8, [%[sha256], #16]\n\t"
        "str	r9, [%[sha256]]\n\t"
        /* Calc new W[7] */
        "ldr	r6, [sp, #20]\n\t"
        "ldr	r7, [sp]\n\t"
        "ldr	r8, [sp, #32]\n\t"
        "ldr	r9, [sp, #28]\n\t"
        "ror	r4, r6, #17\n\t"
        "ror	r5, r8, #7\n\t"
        "eor	r4, r4, r6, ror #19\n\t"
        "eor	r5, r5, r8, ror #18\n\t"
        "eor	r4, r4, r6, lsr #10\n\t"
        "eor	r5, r5, r8, lsr #3\n\t"
        "add	r9, r9, r7\n\t"
        "add	r4, r4, r5\n\t"
        "add	r9, r9, r4\n\t"
        "str	r9, [sp, #28]\n\t"
        /* Round 8 */
        "ldr	r5, [%[sha256], #16]\n\t"
        "ldr	r6, [%[sha256], #20]\n\t"
        "ldr	r7, [%[sha256], #24]\n\t"
        "ldr	r9, [%[sha256], #28]\n\t"
        "ror	r4, r5, #6\n\t"
        "eor	r6, r6, r7\n\t"
        "eor	r4, r4, r5, ror #11\n\t"
        "and	r6, r6, r5\n\t"
        "eor	r4, r4, r5, ror #25\n\t"
        "eor	r6, r6, r7\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [sp, #32]\n\t"
        "ldr	r6, [r3, #32]\n\t"
        "add	r9, r9, r5\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [%[sha256]]\n\t"
        "ldr	r6, [%[sha256], #4]\n\t"
        "ldr	r7, [%[sha256], #8]\n\t"
        "ldr	r8, [%[sha256], #12]\n\t"
        "ror	r4, r5, #2\n\t"
        "eor	r10, r5, r6\n\t"
        "eor	r4, r4, r5, ror #13\n\t"
        "and	r11, r11, r10\n\t"
        "eor	r4, r4, r5, ror #22\n\t"
        "eor	r11, r11, r6\n\t"
        "add	r8, r8, r9\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r11\n\t"
        "str	r8, [%[sha256], #12]\n\t"
        "str	r9, [%[sha256], #28]\n\t"
        /* Calc new W[8] */
        "ldr	r6, [sp, #24]\n\t"
        "ldr	r7, [sp, #4]\n\t"
        "ldr	r8, [sp, #36]\n\t"
        "ldr	r9, [sp, #32]\n\t"
        "ror	r4, r6, #17\n\t"
        "ror	r5, r8, #7\n\t"
        "eor	r4, r4, r6, ror #19\n\t"
        "eor	r5, r5, r8, ror #18\n\t"
        "eor	r4, r4, r6, lsr #10\n\t"
        "eor	r5, r5, r8, lsr #3\n\t"
        "add	r9, r9, r7\n\t"
        "add	r4, r4, r5\n\t"
        "add	r9, r9, r4\n\t"
        "str	r9, [sp, #32]\n\t"
        /* Round 9 */
        "ldr	r5, [%[sha256], #12]\n\t"
        "ldr	r6, [%[sha256], #16]\n\t"
        "ldr	r7, [%[sha256], #20]\n\t"
        "ldr	r9, [%[sha256], #24]\n\t"
        "ror	r4, r5, #6\n\t"
        "eor	r6, r6, r7\n\t"
        "eor	r4, r4, r5, ror #11\n\t"
        "and	r6, r6, r5\n\t"
        "eor	r4, r4, r5, ror #25\n\t"
        "eor	r6, r6, r7\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [sp, #36]\n\t"
        "ldr	r6, [r3, #36]\n\t"
        "add	r9, r9, r5\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [%[sha256], #28]\n\t"
        "ldr	r6, [%[sha256]]\n\t"
        "ldr	r7, [%[sha256], #4]\n\t"
        "ldr	r8, [%[sha256], #8]\n\t"
        "ror	r4, r5, #2\n\t"
        "eor	r11, r5, r6\n\t"
        "eor	r4, r4, r5, ror #13\n\t"
        "and	r10, r10, r11\n\t"
        "eor	r4, r4, r5, ror #22\n\t"
        "eor	r10, r10, r6\n\t"
        "add	r8, r8, r9\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r10\n\t"
        "str	r8, [%[sha256], #8]\n\t"
        "str	r9, [%[sha256], #24]\n\t"
        /* Calc new W[9] */
        "ldr	r6, [sp, #28]\n\t"
        "ldr	r7, [sp, #8]\n\t"
        "ldr	r8, [sp, #40]\n\t"
        "ldr	r9, [sp, #36]\n\t"
        "ror	r4, r6, #17\n\t"
        "ror	r5, r8, #7\n\t"
        "eor	r4, r4, r6, ror #19\n\t"
        "eor	r5, r5, r8, ror #18\n\t"
        "eor	r4, r4, r6, lsr #10\n\t"
        "eor	r5, r5, r8, lsr #3\n\t"
        "add	r9, r9, r7\n\t"
        "add	r4, r4, r5\n\t"
        "add	r9, r9, r4\n\t"
        "str	r9, [sp, #36]\n\t"
        /* Round 10 */
        "ldr	r5, [%[sha256], #8]\n\t"
        "ldr	r6, [%[sha256], #12]\n\t"
        "ldr	r7, [%[sha256], #16]\n\t"
        "ldr	r9, [%[sha256], #20]\n\t"
        "ror	r4, r5, #6\n\t"
        "eor	r6, r6, r7\n\t"
        "eor	r4, r4, r5, ror #11\n\t"
        "and	r6, r6, r5\n\t"
        "eor	r4, r4, r5, ror #25\n\t"
        "eor	r6, r6, r7\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [sp, #40]\n\t"
        "ldr	r6, [r3, #40]\n\t"
        "add	r9, r9, r5\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [%[sha256], #24]\n\t"
        "ldr	r6, [%[sha256], #28]\n\t"
        "ldr	r7, [%[sha256]]\n\t"
        "ldr	r8, [%[sha256], #4]\n\t"
        "ror	r4, r5, #2\n\t"
        "eor	r10, r5, r6\n\t"
        "eor	r4, r4, r5, ror #13\n\t"
        "and	r11, r11, r10\n\t"
        "eor	r4, r4, r5, ror #22\n\t"
        "eor	r11, r11, r6\n\t"
        "add	r8, r8, r9\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r11\n\t"
        "str	r8, [%[sha256], #4]\n\t"
        "str	r9, [%[sha256], #20]\n\t"
        /* Calc new W[10] */
        "ldr	r6, [sp, #32]\n\t"
        "ldr	r7, [sp, #12]\n\t"
        "ldr	r8, [sp, #44]\n\t"
        "ldr	r9, [sp, #40]\n\t"
        "ror	r4, r6, #17\n\t"
        "ror	r5, r8, #7\n\t"
        "eor	r4, r4, r6, ror #19\n\t"
        "eor	r5, r5, r8, ror #18\n\t"
        "eor	r4, r4, r6, lsr #10\n\t"
        "eor	r5, r5, r8, lsr #3\n\t"
        "add	r9, r9, r7\n\t"
        "add	r4, r4, r5\n\t"
        "add	r9, r9, r4\n\t"
        "str	r9, [sp, #40]\n\t"
        /* Round 11 */
        "ldr	r5, [%[sha256], #4]\n\t"
        "ldr	r6, [%[sha256], #8]\n\t"
        "ldr	r7, [%[sha256], #12]\n\t"
        "ldr	r9, [%[sha256], #16]\n\t"
        "ror	r4, r5, #6\n\t"
        "eor	r6, r6, r7\n\t"
        "eor	r4, r4, r5, ror #11\n\t"
        "and	r6, r6, r5\n\t"
        "eor	r4, r4, r5, ror #25\n\t"
        "eor	r6, r6, r7\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [sp, #44]\n\t"
        "ldr	r6, [r3, #44]\n\t"
        "add	r9, r9, r5\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [%[sha256], #20]\n\t"
        "ldr	r6, [%[sha256], #24]\n\t"
        "ldr	r7, [%[sha256], #28]\n\t"
        "ldr	r8, [%[sha256]]\n\t"
        "ror	r4, r5, #2\n\t"
        "eor	r11, r5, r6\n\t"
        "eor	r4, r4, r5, ror #13\n\t"
        "and	r10, r10, r11\n\t"
        "eor	r4, r4, r5, ror #22\n\t"
        "eor	r10, r10, r6\n\t"
        "add	r8, r8, r9\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r10\n\t"
        "str	r8, [%[sha256]]\n\t"
        "str	r9, [%[sha256], #16]\n\t"
        /* Calc new W[11] */
        "ldr	r6, [sp, #36]\n\t"
        "ldr	r7, [sp, #16]\n\t"
        "ldr	r8, [sp, #48]\n\t"
        "ldr	r9, [sp, #44]\n\t"
        "ror	r4, r6, #17\n\t"
        "ror	r5, r8, #7\n\t"
        "eor	r4, r4, r6, ror #19\n\t"
        "eor	r5, r5, r8, ror #18\n\t"
        "eor	r4, r4, r6, lsr #10\n\t"
        "eor	r5, r5, r8, lsr #3\n\t"
        "add	r9, r9, r7\n\t"
        "add	r4, r4, r5\n\t"
        "add	r9, r9, r4\n\t"
        "str	r9, [sp, #44]\n\t"
        /* Round 12 */
        "ldr	r5, [%[sha256]]\n\t"
        "ldr	r6, [%[sha256], #4]\n\t"
        "ldr	r7, [%[sha256], #8]\n\t"
        "ldr	r9, [%[sha256], #12]\n\t"
        "ror	r4, r5, #6\n\t"
        "eor	r6, r6, r7\n\t"
        "eor	r4, r4, r5, ror #11\n\t"
        "and	r6, r6, r5\n\t"
        "eor	r4, r4, r5, ror #25\n\t"
        "eor	r6, r6, r7\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [sp, #48]\n\t"
        "ldr	r6, [r3, #48]\n\t"
        "add	r9, r9, r5\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [%[sha256], #16]\n\t"
        "ldr	r6, [%[sha256], #20]\n\t"
        "ldr	r7, [%[sha256], #24]\n\t"
        "ldr	r8, [%[sha256], #28]\n\t"
        "ror	r4, r5, #2\n\t"
        "eor	r10, r5, r6\n\t"
        "eor	r4, r4, r5, ror #13\n\t"
        "and	r11, r11, r10\n\t"
        "eor	r4, r4, r5, ror #22\n\t"
        "eor	r11, r11, r6\n\t"
        "add	r8, r8, r9\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r11\n\t"
        "str	r8, [%[sha256], #28]\n\t"
        "str	r9, [%[sha256], #12]\n\t"
        /* Calc new W[12] */
        "ldr	r6, [sp, #40]\n\t"
        "ldr	r7, [sp, #20]\n\t"
        "ldr	r8, [sp, #52]\n\t"
        "ldr	r9, [sp, #48]\n\t"
        "ror	r4, r6, #17\n\t"
        "ror	r5, r8, #7\n\t"
        "eor	r4, r4, r6, ror #19\n\t"
        "eor	r5, r5, r8, ror #18\n\t"
        "eor	r4, r4, r6, lsr #10\n\t"
        "eor	r5, r5, r8, lsr #3\n\t"
        "add	r9, r9, r7\n\t"
        "add	r4, r4, r5\n\t"
        "add	r9, r9, r4\n\t"
        "str	r9, [sp, #48]\n\t"
        /* Round 13 */
        "ldr	r5, [%[sha256], #28]\n\t"
        "ldr	r6, [%[sha256]]\n\t"
        "ldr	r7, [%[sha256], #4]\n\t"
        "ldr	r9, [%[sha256], #8]\n\t"
        "ror	r4, r5, #6\n\t"
        "eor	r6, r6, r7\n\t"
        "eor	r4, r4, r5, ror #11\n\t"
        "and	r6, r6, r5\n\t"
        "eor	r4, r4, r5, ror #25\n\t"
        "eor	r6, r6, r7\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [sp, #52]\n\t"
        "ldr	r6, [r3, #52]\n\t"
        "add	r9, r9, r5\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [%[sha256], #12]\n\t"
        "ldr	r6, [%[sha256], #16]\n\t"
        "ldr	r7, [%[sha256], #20]\n\t"
        "ldr	r8, [%[sha256], #24]\n\t"
        "ror	r4, r5, #2\n\t"
        "eor	r11, r5, r6\n\t"
        "eor	r4, r4, r5, ror #13\n\t"
        "and	r10, r10, r11\n\t"
        "eor	r4, r4, r5, ror #22\n\t"
        "eor	r10, r10, r6\n\t"
        "add	r8, r8, r9\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r10\n\t"
        "str	r8, [%[sha256], #24]\n\t"
        "str	r9, [%[sha256], #8]\n\t"
        /* Calc new W[13] */
        "ldr	r6, [sp, #44]\n\t"
        "ldr	r7, [sp, #24]\n\t"
        "ldr	r8, [sp, #56]\n\t"
        "ldr	r9, [sp, #52]\n\t"
        "ror	r4, r6, #17\n\t"
        "ror	r5, r8, #7\n\t"
        "eor	r4, r4, r6, ror #19\n\t"
        "eor	r5, r5, r8, ror #18\n\t"
        "eor	r4, r4, r6, lsr #10\n\t"
        "eor	r5, r5, r8, lsr #3\n\t"
        "add	r9, r9, r7\n\t"
        "add	r4, r4, r5\n\t"
        "add	r9, r9, r4\n\t"
        "str	r9, [sp, #52]\n\t"
        /* Round 14 */
        "ldr	r5, [%[sha256], #24]\n\t"
        "ldr	r6, [%[sha256], #28]\n\t"
        "ldr	r7, [%[sha256]]\n\t"
        "ldr	r9, [%[sha256], #4]\n\t"
        "ror	r4, r5, #6\n\t"
        "eor	r6, r6, r7\n\t"
        "eor	r4, r4, r5, ror #11\n\t"
        "and	r6, r6, r5\n\t"
        "eor	r4, r4, r5, ror #25\n\t"
        "eor	r6, r6, r7\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [sp, #56]\n\t"
        "ldr	r6, [r3, #56]\n\t"
        "add	r9, r9, r5\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [%[sha256], #8]\n\t"
        "ldr	r6, [%[sha256], #12]\n\t"
        "ldr	r7, [%[sha256], #16]\n\t"
        "ldr	r8, [%[sha256], #20]\n\t"
        "ror	r4, r5, #2\n\t"
        "eor	r10, r5, r6\n\t"
        "eor	r4, r4, r5, ror #13\n\t"
        "and	r11, r11, r10\n\t"
        "eor	r4, r4, r5, ror #22\n\t"
        "eor	r11, r11, r6\n\t"
        "add	r8, r8, r9\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r11\n\t"
        "str	r8, [%[sha256], #20]\n\t"
        "str	r9, [%[sha256], #4]\n\t"
        /* Calc new W[14] */
        "ldr	r6, [sp, #48]\n\t"
        "ldr	r7, [sp, #28]\n\t"
        "ldr	r8, [sp, #60]\n\t"
        "ldr	r9, [sp, #56]\n\t"
        "ror	r4, r6, #17\n\t"
        "ror	r5, r8, #7\n\t"
        "eor	r4, r4, r6, ror #19\n\t"
        "eor	r5, r5, r8, ror #18\n\t"
        "eor	r4, r4, r6, lsr #10\n\t"
        "eor	r5, r5, r8, lsr #3\n\t"
        "add	r9, r9, r7\n\t"
        "add	r4, r4, r5\n\t"
        "add	r9, r9, r4\n\t"
        "str	r9, [sp, #56]\n\t"
        /* Round 15 */
        "ldr	r5, [%[sha256], #20]\n\t"
        "ldr	r6, [%[sha256], #24]\n\t"
        "ldr	r7, [%[sha256], #28]\n\t"
        "ldr	r9, [%[sha256]]\n\t"
        "ror	r4, r5, #6\n\t"
        "eor	r6, r6, r7\n\t"
        "eor	r4, r4, r5, ror #11\n\t"
        "and	r6, r6, r5\n\t"
        "eor	r4, r4, r5, ror #25\n\t"
        "eor	r6, r6, r7\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [sp, #60]\n\t"
        "ldr	r6, [r3, #60]\n\t"
        "add	r9, r9, r5\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [%[sha256], #4]\n\t"
        "ldr	r6, [%[sha256], #8]\n\t"
        "ldr	r7, [%[sha256], #12]\n\t"
        "ldr	r8, [%[sha256], #16]\n\t"
        "ror	r4, r5, #2\n\t"
        "eor	r11, r5, r6\n\t"
        "eor	r4, r4, r5, ror #13\n\t"
        "and	r10, r10, r11\n\t"
        "eor	r4, r4, r5, ror #22\n\t"
        "eor	r10, r10, r6\n\t"
        "add	r8, r8, r9\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r10\n\t"
        "str	r8, [%[sha256], #16]\n\t"
        "str	r9, [%[sha256]]\n\t"
        /* Calc new W[15] */
        "ldr	r6, [sp, #52]\n\t"
        "ldr	r7, [sp, #32]\n\t"
        "ldr	r8, [sp]\n\t"
        "ldr	r9, [sp, #60]\n\t"
        "ror	r4, r6, #17\n\t"
        "ror	r5, r8, #7\n\t"
        "eor	r4, r4, r6, ror #19\n\t"
        "eor	r5, r5, r8, ror #18\n\t"
        "eor	r4, r4, r6, lsr #10\n\t"
        "eor	r5, r5, r8, lsr #3\n\t"
        "add	r9, r9, r7\n\t"
        "add	r4, r4, r5\n\t"
        "add	r9, r9, r4\n\t"
        "str	r9, [sp, #60]\n\t"
        "add	r3, r3, #0x40\n\t"
        "subs	r12, r12, #1\n\t"
        "bne	L_SHA256_transform_len_start_%=\n\t"
        /* Round 0 */
        "ldr	r5, [%[sha256], #16]\n\t"
        "ldr	r6, [%[sha256], #20]\n\t"
        "ldr	r7, [%[sha256], #24]\n\t"
        "ldr	r9, [%[sha256], #28]\n\t"
        "ror	r4, r5, #6\n\t"
        "eor	r6, r6, r7\n\t"
        "eor	r4, r4, r5, ror #11\n\t"
        "and	r6, r6, r5\n\t"
        "eor	r4, r4, r5, ror #25\n\t"
        "eor	r6, r6, r7\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [sp]\n\t"
        "ldr	r6, [r3]\n\t"
        "add	r9, r9, r5\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [%[sha256]]\n\t"
        "ldr	r6, [%[sha256], #4]\n\t"
        "ldr	r7, [%[sha256], #8]\n\t"
        "ldr	r8, [%[sha256], #12]\n\t"
        "ror	r4, r5, #2\n\t"
        "eor	r10, r5, r6\n\t"
        "eor	r4, r4, r5, ror #13\n\t"
        "and	r11, r11, r10\n\t"
        "eor	r4, r4, r5, ror #22\n\t"
        "eor	r11, r11, r6\n\t"
        "add	r8, r8, r9\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r11\n\t"
        "str	r8, [%[sha256], #12]\n\t"
        "str	r9, [%[sha256], #28]\n\t"
        /* Round 1 */
        "ldr	r5, [%[sha256], #12]\n\t"
        "ldr	r6, [%[sha256], #16]\n\t"
        "ldr	r7, [%[sha256], #20]\n\t"
        "ldr	r9, [%[sha256], #24]\n\t"
        "ror	r4, r5, #6\n\t"
        "eor	r6, r6, r7\n\t"
        "eor	r4, r4, r5, ror #11\n\t"
        "and	r6, r6, r5\n\t"
        "eor	r4, r4, r5, ror #25\n\t"
        "eor	r6, r6, r7\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [sp, #4]\n\t"
        "ldr	r6, [r3, #4]\n\t"
        "add	r9, r9, r5\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [%[sha256], #28]\n\t"
        "ldr	r6, [%[sha256]]\n\t"
        "ldr	r7, [%[sha256], #4]\n\t"
        "ldr	r8, [%[sha256], #8]\n\t"
        "ror	r4, r5, #2\n\t"
        "eor	r11, r5, r6\n\t"
        "eor	r4, r4, r5, ror #13\n\t"
        "and	r10, r10, r11\n\t"
        "eor	r4, r4, r5, ror #22\n\t"
        "eor	r10, r10, r6\n\t"
        "add	r8, r8, r9\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r10\n\t"
        "str	r8, [%[sha256], #8]\n\t"
        "str	r9, [%[sha256], #24]\n\t"
        /* Round 2 */
        "ldr	r5, [%[sha256], #8]\n\t"
        "ldr	r6, [%[sha256], #12]\n\t"
        "ldr	r7, [%[sha256], #16]\n\t"
        "ldr	r9, [%[sha256], #20]\n\t"
        "ror	r4, r5, #6\n\t"
        "eor	r6, r6, r7\n\t"
        "eor	r4, r4, r5, ror #11\n\t"
        "and	r6, r6, r5\n\t"
        "eor	r4, r4, r5, ror #25\n\t"
        "eor	r6, r6, r7\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [sp, #8]\n\t"
        "ldr	r6, [r3, #8]\n\t"
        "add	r9, r9, r5\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [%[sha256], #24]\n\t"
        "ldr	r6, [%[sha256], #28]\n\t"
        "ldr	r7, [%[sha256]]\n\t"
        "ldr	r8, [%[sha256], #4]\n\t"
        "ror	r4, r5, #2\n\t"
        "eor	r10, r5, r6\n\t"
        "eor	r4, r4, r5, ror #13\n\t"
        "and	r11, r11, r10\n\t"
        "eor	r4, r4, r5, ror #22\n\t"
        "eor	r11, r11, r6\n\t"
        "add	r8, r8, r9\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r11\n\t"
        "str	r8, [%[sha256], #4]\n\t"
        "str	r9, [%[sha256], #20]\n\t"
        /* Round 3 */
        "ldr	r5, [%[sha256], #4]\n\t"
        "ldr	r6, [%[sha256], #8]\n\t"
        "ldr	r7, [%[sha256], #12]\n\t"
        "ldr	r9, [%[sha256], #16]\n\t"
        "ror	r4, r5, #6\n\t"
        "eor	r6, r6, r7\n\t"
        "eor	r4, r4, r5, ror #11\n\t"
        "and	r6, r6, r5\n\t"
        "eor	r4, r4, r5, ror #25\n\t"
        "eor	r6, r6, r7\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [sp, #12]\n\t"
        "ldr	r6, [r3, #12]\n\t"
        "add	r9, r9, r5\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [%[sha256], #20]\n\t"
        "ldr	r6, [%[sha256], #24]\n\t"
        "ldr	r7, [%[sha256], #28]\n\t"
        "ldr	r8, [%[sha256]]\n\t"
        "ror	r4, r5, #2\n\t"
        "eor	r11, r5, r6\n\t"
        "eor	r4, r4, r5, ror #13\n\t"
        "and	r10, r10, r11\n\t"
        "eor	r4, r4, r5, ror #22\n\t"
        "eor	r10, r10, r6\n\t"
        "add	r8, r8, r9\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r10\n\t"
        "str	r8, [%[sha256]]\n\t"
        "str	r9, [%[sha256], #16]\n\t"
        /* Round 4 */
        "ldr	r5, [%[sha256]]\n\t"
        "ldr	r6, [%[sha256], #4]\n\t"
        "ldr	r7, [%[sha256], #8]\n\t"
        "ldr	r9, [%[sha256], #12]\n\t"
        "ror	r4, r5, #6\n\t"
        "eor	r6, r6, r7\n\t"
        "eor	r4, r4, r5, ror #11\n\t"
        "and	r6, r6, r5\n\t"
        "eor	r4, r4, r5, ror #25\n\t"
        "eor	r6, r6, r7\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [sp, #16]\n\t"
        "ldr	r6, [r3, #16]\n\t"
        "add	r9, r9, r5\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [%[sha256], #16]\n\t"
        "ldr	r6, [%[sha256], #20]\n\t"
        "ldr	r7, [%[sha256], #24]\n\t"
        "ldr	r8, [%[sha256], #28]\n\t"
        "ror	r4, r5, #2\n\t"
        "eor	r10, r5, r6\n\t"
        "eor	r4, r4, r5, ror #13\n\t"
        "and	r11, r11, r10\n\t"
        "eor	r4, r4, r5, ror #22\n\t"
        "eor	r11, r11, r6\n\t"
        "add	r8, r8, r9\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r11\n\t"
        "str	r8, [%[sha256], #28]\n\t"
        "str	r9, [%[sha256], #12]\n\t"
        /* Round 5 */
        "ldr	r5, [%[sha256], #28]\n\t"
        "ldr	r6, [%[sha256]]\n\t"
        "ldr	r7, [%[sha256], #4]\n\t"
        "ldr	r9, [%[sha256], #8]\n\t"
        "ror	r4, r5, #6\n\t"
        "eor	r6, r6, r7\n\t"
        "eor	r4, r4, r5, ror #11\n\t"
        "and	r6, r6, r5\n\t"
        "eor	r4, r4, r5, ror #25\n\t"
        "eor	r6, r6, r7\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [sp, #20]\n\t"
        "ldr	r6, [r3, #20]\n\t"
        "add	r9, r9, r5\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [%[sha256], #12]\n\t"
        "ldr	r6, [%[sha256], #16]\n\t"
        "ldr	r7, [%[sha256], #20]\n\t"
        "ldr	r8, [%[sha256], #24]\n\t"
        "ror	r4, r5, #2\n\t"
        "eor	r11, r5, r6\n\t"
        "eor	r4, r4, r5, ror #13\n\t"
        "and	r10, r10, r11\n\t"
        "eor	r4, r4, r5, ror #22\n\t"
        "eor	r10, r10, r6\n\t"
        "add	r8, r8, r9\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r10\n\t"
        "str	r8, [%[sha256], #24]\n\t"
        "str	r9, [%[sha256], #8]\n\t"
        /* Round 6 */
        "ldr	r5, [%[sha256], #24]\n\t"
        "ldr	r6, [%[sha256], #28]\n\t"
        "ldr	r7, [%[sha256]]\n\t"
        "ldr	r9, [%[sha256], #4]\n\t"
        "ror	r4, r5, #6\n\t"
        "eor	r6, r6, r7\n\t"
        "eor	r4, r4, r5, ror #11\n\t"
        "and	r6, r6, r5\n\t"
        "eor	r4, r4, r5, ror #25\n\t"
        "eor	r6, r6, r7\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [sp, #24]\n\t"
        "ldr	r6, [r3, #24]\n\t"
        "add	r9, r9, r5\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [%[sha256], #8]\n\t"
        "ldr	r6, [%[sha256], #12]\n\t"
        "ldr	r7, [%[sha256], #16]\n\t"
        "ldr	r8, [%[sha256], #20]\n\t"
        "ror	r4, r5, #2\n\t"
        "eor	r10, r5, r6\n\t"
        "eor	r4, r4, r5, ror #13\n\t"
        "and	r11, r11, r10\n\t"
        "eor	r4, r4, r5, ror #22\n\t"
        "eor	r11, r11, r6\n\t"
        "add	r8, r8, r9\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r11\n\t"
        "str	r8, [%[sha256], #20]\n\t"
        "str	r9, [%[sha256], #4]\n\t"
        /* Round 7 */
        "ldr	r5, [%[sha256], #20]\n\t"
        "ldr	r6, [%[sha256], #24]\n\t"
        "ldr	r7, [%[sha256], #28]\n\t"
        "ldr	r9, [%[sha256]]\n\t"
        "ror	r4, r5, #6\n\t"
        "eor	r6, r6, r7\n\t"
        "eor	r4, r4, r5, ror #11\n\t"
        "and	r6, r6, r5\n\t"
        "eor	r4, r4, r5, ror #25\n\t"
        "eor	r6, r6, r7\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [sp, #28]\n\t"
        "ldr	r6, [r3, #28]\n\t"
        "add	r9, r9, r5\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [%[sha256], #4]\n\t"
        "ldr	r6, [%[sha256], #8]\n\t"
        "ldr	r7, [%[sha256], #12]\n\t"
        "ldr	r8, [%[sha256], #16]\n\t"
        "ror	r4, r5, #2\n\t"
        "eor	r11, r5, r6\n\t"
        "eor	r4, r4, r5, ror #13\n\t"
        "and	r10, r10, r11\n\t"
        "eor	r4, r4, r5, ror #22\n\t"
        "eor	r10, r10, r6\n\t"
        "add	r8, r8, r9\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r10\n\t"
        "str	r8, [%[sha256], #16]\n\t"
        "str	r9, [%[sha256]]\n\t"
        /* Round 8 */
        "ldr	r5, [%[sha256], #16]\n\t"
        "ldr	r6, [%[sha256], #20]\n\t"
        "ldr	r7, [%[sha256], #24]\n\t"
        "ldr	r9, [%[sha256], #28]\n\t"
        "ror	r4, r5, #6\n\t"
        "eor	r6, r6, r7\n\t"
        "eor	r4, r4, r5, ror #11\n\t"
        "and	r6, r6, r5\n\t"
        "eor	r4, r4, r5, ror #25\n\t"
        "eor	r6, r6, r7\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [sp, #32]\n\t"
        "ldr	r6, [r3, #32]\n\t"
        "add	r9, r9, r5\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [%[sha256]]\n\t"
        "ldr	r6, [%[sha256], #4]\n\t"
        "ldr	r7, [%[sha256], #8]\n\t"
        "ldr	r8, [%[sha256], #12]\n\t"
        "ror	r4, r5, #2\n\t"
        "eor	r10, r5, r6\n\t"
        "eor	r4, r4, r5, ror #13\n\t"
        "and	r11, r11, r10\n\t"
        "eor	r4, r4, r5, ror #22\n\t"
        "eor	r11, r11, r6\n\t"
        "add	r8, r8, r9\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r11\n\t"
        "str	r8, [%[sha256], #12]\n\t"
        "str	r9, [%[sha256], #28]\n\t"
        /* Round 9 */
        "ldr	r5, [%[sha256], #12]\n\t"
        "ldr	r6, [%[sha256], #16]\n\t"
        "ldr	r7, [%[sha256], #20]\n\t"
        "ldr	r9, [%[sha256], #24]\n\t"
        "ror	r4, r5, #6\n\t"
        "eor	r6, r6, r7\n\t"
        "eor	r4, r4, r5, ror #11\n\t"
        "and	r6, r6, r5\n\t"
        "eor	r4, r4, r5, ror #25\n\t"
        "eor	r6, r6, r7\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [sp, #36]\n\t"
        "ldr	r6, [r3, #36]\n\t"
        "add	r9, r9, r5\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [%[sha256], #28]\n\t"
        "ldr	r6, [%[sha256]]\n\t"
        "ldr	r7, [%[sha256], #4]\n\t"
        "ldr	r8, [%[sha256], #8]\n\t"
        "ror	r4, r5, #2\n\t"
        "eor	r11, r5, r6\n\t"
        "eor	r4, r4, r5, ror #13\n\t"
        "and	r10, r10, r11\n\t"
        "eor	r4, r4, r5, ror #22\n\t"
        "eor	r10, r10, r6\n\t"
        "add	r8, r8, r9\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r10\n\t"
        "str	r8, [%[sha256], #8]\n\t"
        "str	r9, [%[sha256], #24]\n\t"
        /* Round 10 */
        "ldr	r5, [%[sha256], #8]\n\t"
        "ldr	r6, [%[sha256], #12]\n\t"
        "ldr	r7, [%[sha256], #16]\n\t"
        "ldr	r9, [%[sha256], #20]\n\t"
        "ror	r4, r5, #6\n\t"
        "eor	r6, r6, r7\n\t"
        "eor	r4, r4, r5, ror #11\n\t"
        "and	r6, r6, r5\n\t"
        "eor	r4, r4, r5, ror #25\n\t"
        "eor	r6, r6, r7\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [sp, #40]\n\t"
        "ldr	r6, [r3, #40]\n\t"
        "add	r9, r9, r5\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [%[sha256], #24]\n\t"
        "ldr	r6, [%[sha256], #28]\n\t"
        "ldr	r7, [%[sha256]]\n\t"
        "ldr	r8, [%[sha256], #4]\n\t"
        "ror	r4, r5, #2\n\t"
        "eor	r10, r5, r6\n\t"
        "eor	r4, r4, r5, ror #13\n\t"
        "and	r11, r11, r10\n\t"
        "eor	r4, r4, r5, ror #22\n\t"
        "eor	r11, r11, r6\n\t"
        "add	r8, r8, r9\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r11\n\t"
        "str	r8, [%[sha256], #4]\n\t"
        "str	r9, [%[sha256], #20]\n\t"
        /* Round 11 */
        "ldr	r5, [%[sha256], #4]\n\t"
        "ldr	r6, [%[sha256], #8]\n\t"
        "ldr	r7, [%[sha256], #12]\n\t"
        "ldr	r9, [%[sha256], #16]\n\t"
        "ror	r4, r5, #6\n\t"
        "eor	r6, r6, r7\n\t"
        "eor	r4, r4, r5, ror #11\n\t"
        "and	r6, r6, r5\n\t"
        "eor	r4, r4, r5, ror #25\n\t"
        "eor	r6, r6, r7\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [sp, #44]\n\t"
        "ldr	r6, [r3, #44]\n\t"
        "add	r9, r9, r5\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [%[sha256], #20]\n\t"
        "ldr	r6, [%[sha256], #24]\n\t"
        "ldr	r7, [%[sha256], #28]\n\t"
        "ldr	r8, [%[sha256]]\n\t"
        "ror	r4, r5, #2\n\t"
        "eor	r11, r5, r6\n\t"
        "eor	r4, r4, r5, ror #13\n\t"
        "and	r10, r10, r11\n\t"
        "eor	r4, r4, r5, ror #22\n\t"
        "eor	r10, r10, r6\n\t"
        "add	r8, r8, r9\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r10\n\t"
        "str	r8, [%[sha256]]\n\t"
        "str	r9, [%[sha256], #16]\n\t"
        /* Round 12 */
        "ldr	r5, [%[sha256]]\n\t"
        "ldr	r6, [%[sha256], #4]\n\t"
        "ldr	r7, [%[sha256], #8]\n\t"
        "ldr	r9, [%[sha256], #12]\n\t"
        "ror	r4, r5, #6\n\t"
        "eor	r6, r6, r7\n\t"
        "eor	r4, r4, r5, ror #11\n\t"
        "and	r6, r6, r5\n\t"
        "eor	r4, r4, r5, ror #25\n\t"
        "eor	r6, r6, r7\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [sp, #48]\n\t"
        "ldr	r6, [r3, #48]\n\t"
        "add	r9, r9, r5\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [%[sha256], #16]\n\t"
        "ldr	r6, [%[sha256], #20]\n\t"
        "ldr	r7, [%[sha256], #24]\n\t"
        "ldr	r8, [%[sha256], #28]\n\t"
        "ror	r4, r5, #2\n\t"
        "eor	r10, r5, r6\n\t"
        "eor	r4, r4, r5, ror #13\n\t"
        "and	r11, r11, r10\n\t"
        "eor	r4, r4, r5, ror #22\n\t"
        "eor	r11, r11, r6\n\t"
        "add	r8, r8, r9\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r11\n\t"
        "str	r8, [%[sha256], #28]\n\t"
        "str	r9, [%[sha256], #12]\n\t"
        /* Round 13 */
        "ldr	r5, [%[sha256], #28]\n\t"
        "ldr	r6, [%[sha256]]\n\t"
        "ldr	r7, [%[sha256], #4]\n\t"
        "ldr	r9, [%[sha256], #8]\n\t"
        "ror	r4, r5, #6\n\t"
        "eor	r6, r6, r7\n\t"
        "eor	r4, r4, r5, ror #11\n\t"
        "and	r6, r6, r5\n\t"
        "eor	r4, r4, r5, ror #25\n\t"
        "eor	r6, r6, r7\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [sp, #52]\n\t"
        "ldr	r6, [r3, #52]\n\t"
        "add	r9, r9, r5\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [%[sha256], #12]\n\t"
        "ldr	r6, [%[sha256], #16]\n\t"
        "ldr	r7, [%[sha256], #20]\n\t"
        "ldr	r8, [%[sha256], #24]\n\t"
        "ror	r4, r5, #2\n\t"
        "eor	r11, r5, r6\n\t"
        "eor	r4, r4, r5, ror #13\n\t"
        "and	r10, r10, r11\n\t"
        "eor	r4, r4, r5, ror #22\n\t"
        "eor	r10, r10, r6\n\t"
        "add	r8, r8, r9\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r10\n\t"
        "str	r8, [%[sha256], #24]\n\t"
        "str	r9, [%[sha256], #8]\n\t"
        /* Round 14 */
        "ldr	r5, [%[sha256], #24]\n\t"
        "ldr	r6, [%[sha256], #28]\n\t"
        "ldr	r7, [%[sha256]]\n\t"
        "ldr	r9, [%[sha256], #4]\n\t"
        "ror	r4, r5, #6\n\t"
        "eor	r6, r6, r7\n\t"
        "eor	r4, r4, r5, ror #11\n\t"
        "and	r6, r6, r5\n\t"
        "eor	r4, r4, r5, ror #25\n\t"
        "eor	r6, r6, r7\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [sp, #56]\n\t"
        "ldr	r6, [r3, #56]\n\t"
        "add	r9, r9, r5\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [%[sha256], #8]\n\t"
        "ldr	r6, [%[sha256], #12]\n\t"
        "ldr	r7, [%[sha256], #16]\n\t"
        "ldr	r8, [%[sha256], #20]\n\t"
        "ror	r4, r5, #2\n\t"
        "eor	r10, r5, r6\n\t"
        "eor	r4, r4, r5, ror #13\n\t"
        "and	r11, r11, r10\n\t"
        "eor	r4, r4, r5, ror #22\n\t"
        "eor	r11, r11, r6\n\t"
        "add	r8, r8, r9\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r11\n\t"
        "str	r8, [%[sha256], #20]\n\t"
        "str	r9, [%[sha256], #4]\n\t"
        /* Round 15 */
        "ldr	r5, [%[sha256], #20]\n\t"
        "ldr	r6, [%[sha256], #24]\n\t"
        "ldr	r7, [%[sha256], #28]\n\t"
        "ldr	r9, [%[sha256]]\n\t"
        "ror	r4, r5, #6\n\t"
        "eor	r6, r6, r7\n\t"
        "eor	r4, r4, r5, ror #11\n\t"
        "and	r6, r6, r5\n\t"
        "eor	r4, r4, r5, ror #25\n\t"
        "eor	r6, r6, r7\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [sp, #60]\n\t"
        "ldr	r6, [r3, #60]\n\t"
        "add	r9, r9, r5\n\t"
        "add	r9, r9, r6\n\t"
        "ldr	r5, [%[sha256], #4]\n\t"
        "ldr	r6, [%[sha256], #8]\n\t"
        "ldr	r7, [%[sha256], #12]\n\t"
        "ldr	r8, [%[sha256], #16]\n\t"
        "ror	r4, r5, #2\n\t"
        "eor	r11, r5, r6\n\t"
        "eor	r4, r4, r5, ror #13\n\t"
        "and	r10, r10, r11\n\t"
        "eor	r4, r4, r5, ror #22\n\t"
        "eor	r10, r10, r6\n\t"
        "add	r8, r8, r9\n\t"
        "add	r9, r9, r4\n\t"
        "add	r9, r9, r10\n\t"
        "str	r8, [%[sha256], #16]\n\t"
        "str	r9, [%[sha256]]\n\t"
        /* Add in digest from start */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "ldr	r4, [%[sha256]]\n\t"
        "ldr	r5, [%[sha256], #4]\n\t"
#else
        "ldrd	r4, r5, [%[sha256]]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "ldr	r6, [%[sha256], #8]\n\t"
        "ldr	r7, [%[sha256], #12]\n\t"
#else
        "ldrd	r6, r7, [%[sha256], #8]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "ldr	r8, [sp, #64]\n\t"
        "ldr	r9, [sp, #68]\n\t"
#else
        "ldrd	r8, r9, [sp, #64]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "ldr	r10, [sp, #72]\n\t"
        "ldr	r11, [sp, #76]\n\t"
#else
        "ldrd	r10, r11, [sp, #72]\n\t"
#endif
        "add	r4, r4, r8\n\t"
        "add	r5, r5, r9\n\t"
        "add	r6, r6, r10\n\t"
        "add	r7, r7, r11\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "str	r4, [%[sha256]]\n\t"
        "str	r5, [%[sha256], #4]\n\t"
#else
        "strd	r4, r5, [%[sha256]]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "str	r6, [%[sha256], #8]\n\t"
        "str	r7, [%[sha256], #12]\n\t"
#else
        "strd	r6, r7, [%[sha256], #8]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "str	r4, [sp, #64]\n\t"
        "str	r5, [sp, #68]\n\t"
#else
        "strd	r4, r5, [sp, #64]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "str	r6, [sp, #72]\n\t"
        "str	r7, [sp, #76]\n\t"
#else
        "strd	r6, r7, [sp, #72]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "ldr	r4, [%[sha256], #16]\n\t"
        "ldr	r5, [%[sha256], #20]\n\t"
#else
        "ldrd	r4, r5, [%[sha256], #16]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "ldr	r6, [%[sha256], #24]\n\t"
        "ldr	r7, [%[sha256], #28]\n\t"
#else
        "ldrd	r6, r7, [%[sha256], #24]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "ldr	r8, [sp, #80]\n\t"
        "ldr	r9, [sp, #84]\n\t"
#else
        "ldrd	r8, r9, [sp, #80]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "ldr	r10, [sp, #88]\n\t"
        "ldr	r11, [sp, #92]\n\t"
#else
        "ldrd	r10, r11, [sp, #88]\n\t"
#endif
        "add	r4, r4, r8\n\t"
        "add	r5, r5, r9\n\t"
        "add	r6, r6, r10\n\t"
        "add	r7, r7, r11\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "str	r4, [%[sha256], #16]\n\t"
        "str	r5, [%[sha256], #20]\n\t"
#else
        "strd	r4, r5, [%[sha256], #16]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "str	r6, [%[sha256], #24]\n\t"
        "str	r7, [%[sha256], #28]\n\t"
#else
        "strd	r6, r7, [%[sha256], #24]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "str	r4, [sp, #80]\n\t"
        "str	r5, [sp, #84]\n\t"
#else
        "strd	r4, r5, [sp, #80]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "str	r6, [sp, #88]\n\t"
        "str	r7, [sp, #92]\n\t"
#else
        "strd	r6, r7, [sp, #88]\n\t"
#endif
        "subs	%[len], %[len], #0x40\n\t"
        "sub	r3, r3, #0xc0\n\t"
        "add	%[data], %[data], #0x40\n\t"
        "bne	L_SHA256_transform_len_begin_%=\n\t"
        "add	sp, sp, #0xc0\n\t"
        : [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len), [L_SHA256_transform_len_k] "+r" (L_SHA256_transform_len_k_c)
        :
        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc"
    );
}

#endif /* WOLFSSL_ARMASM_NO_NEON */
#include <wolfssl/wolfcrypt/sha256.h>

#ifndef WOLFSSL_ARMASM_NO_NEON
static const uint32_t L_SHA256_transform_neon_len_k[] = {
    0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
    0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
    0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
    0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
    0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
    0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
    0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
    0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
    0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
    0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
    0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
    0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
    0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
    0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
    0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
    0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
};

void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len);
void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
{
    register wc_Sha256* sha256 asm ("r0") = (wc_Sha256*)sha256_p;
    register const byte* data asm ("r1") = (const byte*)data_p;
    register word32 len asm ("r2") = (word32)len_p;
    register uint32_t* L_SHA256_transform_neon_len_k_c asm ("r3") = (uint32_t*)&L_SHA256_transform_neon_len_k;

    __asm__ __volatile__ (
        "sub	sp, sp, #24\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "str	%[sha256], [sp]\n\t"
        "str	%[data], [sp, #4]\n\t"
#else
        "strd	%[sha256], %[data], [sp]\n\t"
#endif
        "str	%[len], [sp, #8]\n\t"
        "mov	r12, %[L_SHA256_transform_neon_len_k]\n\t"
        /* Load digest into registers */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "ldr	%[len], [%[sha256]]\n\t"
        "ldr	r3, [%[sha256], #4]\n\t"
#else
        "ldrd	%[len], r3, [%[sha256]]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "ldr	r4, [%[sha256], #8]\n\t"
        "ldr	r5, [%[sha256], #12]\n\t"
#else
        "ldrd	r4, r5, [%[sha256], #8]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "ldr	r6, [%[sha256], #16]\n\t"
        "ldr	r7, [%[sha256], #20]\n\t"
#else
        "ldrd	r6, r7, [%[sha256], #16]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "ldr	r8, [%[sha256], #24]\n\t"
        "ldr	r9, [%[sha256], #28]\n\t"
#else
        "ldrd	r8, r9, [%[sha256], #24]\n\t"
#endif
        /* Start of loop processing a block */
        "\n"
    "L_SHA256_transform_neon_len_begin_%=: \n\t"
        /* Load W */
        "vld1.8	{d0-d3}, [%[data]]!\n\t"
        "vld1.8	{d4-d7}, [%[data]]!\n\t"
#ifndef WOLFSSL_ARM_ARCH_NEON_64BIT
        "vrev32.8	q0, q0\n\t"
        "vrev32.8	q1, q1\n\t"
        "vrev32.8	q2, q2\n\t"
        "vrev32.8	q3, q3\n\t"
#else
        "vrev32.8	d0, d0\n\t"
        "vrev32.8	d1, d1\n\t"
        "vrev32.8	d2, d2\n\t"
        "vrev32.8	d3, d3\n\t"
        "vrev32.8	d4, d4\n\t"
        "vrev32.8	d5, d5\n\t"
        "vrev32.8	d6, d6\n\t"
        "vrev32.8	d7, d7\n\t"
#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */
        "str	%[data], [sp, #4]\n\t"
        "mov	lr, #3\n\t"
        /* Start of 16 rounds */
        "\n"
    "L_SHA256_transform_neon_len_start_%=: \n\t"
        /* Round 0 */
        "vmov.32	r10, d0[0]\n\t"
        "ror	%[sha256], r6, #6\n\t"
        "eor	%[data], r7, r8\n\t"
        "eor	%[sha256], %[sha256], r6, ror #11\n\t"
        "and	%[data], %[data], r6\n\t"
        "eor	%[sha256], %[sha256], r6, ror #25\n\t"
        "eor	%[data], %[data], r8\n\t"
        "add	r9, r9, %[sha256]\n\t"
        "add	r9, r9, %[data]\n\t"
        "ldr	%[sha256], [r12]\n\t"
        "add	r9, r9, r10\n\t"
        "add	r9, r9, %[sha256]\n\t"
        "add	r5, r5, r9\n\t"
        "ror	%[sha256], %[len], #2\n\t"
        "eor	%[data], %[len], r3\n\t"
        "eor	%[sha256], %[sha256], %[len], ror #13\n\t"
        "eor	r10, r3, r4\n\t"
        "and	%[data], %[data], r10\n\t"
        "eor	%[sha256], %[sha256], %[len], ror #22\n\t"
        "eor	%[data], %[data], r3\n\t"
        "add	r9, r9, %[sha256]\n\t"
        "add	r9, r9, %[data]\n\t"
        /* Round 1 */
        "vmov.32	r10, d0[1]\n\t"
        /* Calc new W[0]-W[1] */
        "vext.8	d10, d0, d1, #4\n\t"
        "ror	%[sha256], r5, #6\n\t"
        "vshl.u32	d8, d7, #15\n\t"
        "eor	%[data], r6, r7\n\t"
        "vsri.u32	d8, d7, #17\n\t"
        "eor	%[sha256], %[sha256], r5, ror #11\n\t"
        "vshl.u32	d9, d7, #13\n\t"
        "and	%[data], %[data], r5\n\t"
        "vsri.u32	d9, d7, #19\n\t"
        "eor	%[sha256], %[sha256], r5, ror #25\n\t"
        "veor	d9, d8\n\t"
        "eor	%[data], %[data], r7\n\t"
        "vshr.u32	d8, d7, #10\n\t"
        "add	r8, r8, %[sha256]\n\t"
        "veor	d9, d8\n\t"
        "add	r8, r8, %[data]\n\t"
        "vadd.i32	d0, d9\n\t"
        "ldr	%[sha256], [r12, #4]\n\t"
        "vext.8	d11, d4, d5, #4\n\t"
        "add	r8, r8, r10\n\t"
        "vadd.i32	d0, d11\n\t"
        "add	r8, r8, %[sha256]\n\t"
        "vshl.u32	d8, d10, #25\n\t"
        "add	r4, r4, r8\n\t"
        "vsri.u32	d8, d10, #7\n\t"
        "ror	%[sha256], r9, #2\n\t"
        "vshl.u32	d9, d10, #14\n\t"
        "eor	%[data], r9, %[len]\n\t"
        "vsri.u32	d9, d10, #18\n\t"
        "eor	%[sha256], %[sha256], r9, ror #13\n\t"
        "veor	d9, d8\n\t"
        "eor	r10, %[len], r3\n\t"
        "vshr.u32	d10, #3\n\t"
        "and	%[data], %[data], r10\n\t"
        "veor	d9, d10\n\t"
        "eor	%[sha256], %[sha256], r9, ror #22\n\t"
        "vadd.i32	d0, d9\n\t"
        "eor	%[data], %[data], %[len]\n\t"
        "add	r8, r8, %[sha256]\n\t"
        "add	r8, r8, %[data]\n\t"
        /* Round 2 */
        "vmov.32	r10, d1[0]\n\t"
        "ror	%[sha256], r4, #6\n\t"
        "eor	%[data], r5, r6\n\t"
        "eor	%[sha256], %[sha256], r4, ror #11\n\t"
        "and	%[data], %[data], r4\n\t"
        "eor	%[sha256], %[sha256], r4, ror #25\n\t"
        "eor	%[data], %[data], r6\n\t"
        "add	r7, r7, %[sha256]\n\t"
        "add	r7, r7, %[data]\n\t"
        "ldr	%[sha256], [r12, #8]\n\t"
        "add	r7, r7, r10\n\t"
        "add	r7, r7, %[sha256]\n\t"
        "add	r3, r3, r7\n\t"
        "ror	%[sha256], r8, #2\n\t"
        "eor	%[data], r8, r9\n\t"
        "eor	%[sha256], %[sha256], r8, ror #13\n\t"
        "eor	r10, r9, %[len]\n\t"
        "and	%[data], %[data], r10\n\t"
        "eor	%[sha256], %[sha256], r8, ror #22\n\t"
        "eor	%[data], %[data], r9\n\t"
        "add	r7, r7, %[sha256]\n\t"
        "add	r7, r7, %[data]\n\t"
        /* Round 3 */
        "vmov.32	r10, d1[1]\n\t"
        /* Calc new W[2]-W[3] */
        "vext.8	d10, d1, d2, #4\n\t"
        "ror	%[sha256], r3, #6\n\t"
        "vshl.u32	d8, d0, #15\n\t"
        "eor	%[data], r4, r5\n\t"
        "vsri.u32	d8, d0, #17\n\t"
        "eor	%[sha256], %[sha256], r3, ror #11\n\t"
        "vshl.u32	d9, d0, #13\n\t"
        "and	%[data], %[data], r3\n\t"
        "vsri.u32	d9, d0, #19\n\t"
        "eor	%[sha256], %[sha256], r3, ror #25\n\t"
        "veor	d9, d8\n\t"
        "eor	%[data], %[data], r5\n\t"
        "vshr.u32	d8, d0, #10\n\t"
        "add	r6, r6, %[sha256]\n\t"
        "veor	d9, d8\n\t"
        "add	r6, r6, %[data]\n\t"
        "vadd.i32	d1, d9\n\t"
        "ldr	%[sha256], [r12, #12]\n\t"
        "vext.8	d11, d5, d6, #4\n\t"
        "add	r6, r6, r10\n\t"
        "vadd.i32	d1, d11\n\t"
        "add	r6, r6, %[sha256]\n\t"
        "vshl.u32	d8, d10, #25\n\t"
        "add	%[len], %[len], r6\n\t"
        "vsri.u32	d8, d10, #7\n\t"
        "ror	%[sha256], r7, #2\n\t"
        "vshl.u32	d9, d10, #14\n\t"
        "eor	%[data], r7, r8\n\t"
        "vsri.u32	d9, d10, #18\n\t"
        "eor	%[sha256], %[sha256], r7, ror #13\n\t"
        "veor	d9, d8\n\t"
        "eor	r10, r8, r9\n\t"
        "vshr.u32	d10, #3\n\t"
        "and	%[data], %[data], r10\n\t"
        "veor	d9, d10\n\t"
        "eor	%[sha256], %[sha256], r7, ror #22\n\t"
        "vadd.i32	d1, d9\n\t"
        "eor	%[data], %[data], r8\n\t"
        "add	r6, r6, %[sha256]\n\t"
        "add	r6, r6, %[data]\n\t"
        /* Round 4 */
        "vmov.32	r10, d2[0]\n\t"
        "ror	%[sha256], %[len], #6\n\t"
        "eor	%[data], r3, r4\n\t"
        "eor	%[sha256], %[sha256], %[len], ror #11\n\t"
        "and	%[data], %[data], %[len]\n\t"
        "eor	%[sha256], %[sha256], %[len], ror #25\n\t"
        "eor	%[data], %[data], r4\n\t"
        "add	r5, r5, %[sha256]\n\t"
        "add	r5, r5, %[data]\n\t"
        "ldr	%[sha256], [r12, #16]\n\t"
        "add	r5, r5, r10\n\t"
        "add	r5, r5, %[sha256]\n\t"
        "add	r9, r9, r5\n\t"
        "ror	%[sha256], r6, #2\n\t"
        "eor	%[data], r6, r7\n\t"
        "eor	%[sha256], %[sha256], r6, ror #13\n\t"
        "eor	r10, r7, r8\n\t"
        "and	%[data], %[data], r10\n\t"
        "eor	%[sha256], %[sha256], r6, ror #22\n\t"
        "eor	%[data], %[data], r7\n\t"
        "add	r5, r5, %[sha256]\n\t"
        "add	r5, r5, %[data]\n\t"
        /* Round 5 */
        "vmov.32	r10, d2[1]\n\t"
        /* Calc new W[4]-W[5] */
        "vext.8	d10, d2, d3, #4\n\t"
        "ror	%[sha256], r9, #6\n\t"
        "vshl.u32	d8, d1, #15\n\t"
        "eor	%[data], %[len], r3\n\t"
        "vsri.u32	d8, d1, #17\n\t"
        "eor	%[sha256], %[sha256], r9, ror #11\n\t"
        "vshl.u32	d9, d1, #13\n\t"
        "and	%[data], %[data], r9\n\t"
        "vsri.u32	d9, d1, #19\n\t"
        "eor	%[sha256], %[sha256], r9, ror #25\n\t"
        "veor	d9, d8\n\t"
        "eor	%[data], %[data], r3\n\t"
        "vshr.u32	d8, d1, #10\n\t"
        "add	r4, r4, %[sha256]\n\t"
        "veor	d9, d8\n\t"
        "add	r4, r4, %[data]\n\t"
        "vadd.i32	d2, d9\n\t"
        "ldr	%[sha256], [r12, #20]\n\t"
        "vext.8	d11, d6, d7, #4\n\t"
        "add	r4, r4, r10\n\t"
        "vadd.i32	d2, d11\n\t"
        "add	r4, r4, %[sha256]\n\t"
        "vshl.u32	d8, d10, #25\n\t"
        "add	r8, r8, r4\n\t"
        "vsri.u32	d8, d10, #7\n\t"
        "ror	%[sha256], r5, #2\n\t"
        "vshl.u32	d9, d10, #14\n\t"
        "eor	%[data], r5, r6\n\t"
        "vsri.u32	d9, d10, #18\n\t"
        "eor	%[sha256], %[sha256], r5, ror #13\n\t"
        "veor	d9, d8\n\t"
        "eor	r10, r6, r7\n\t"
        "vshr.u32	d10, #3\n\t"
        "and	%[data], %[data], r10\n\t"
        "veor	d9, d10\n\t"
        "eor	%[sha256], %[sha256], r5, ror #22\n\t"
        "vadd.i32	d2, d9\n\t"
        "eor	%[data], %[data], r6\n\t"
        "add	r4, r4, %[sha256]\n\t"
        "add	r4, r4, %[data]\n\t"
        /* Round 6 */
        "vmov.32	r10, d3[0]\n\t"
        "ror	%[sha256], r8, #6\n\t"
        "eor	%[data], r9, %[len]\n\t"
        "eor	%[sha256], %[sha256], r8, ror #11\n\t"
        "and	%[data], %[data], r8\n\t"
        "eor	%[sha256], %[sha256], r8, ror #25\n\t"
        "eor	%[data], %[data], %[len]\n\t"
        "add	r3, r3, %[sha256]\n\t"
        "add	r3, r3, %[data]\n\t"
        "ldr	%[sha256], [r12, #24]\n\t"
        "add	r3, r3, r10\n\t"
        "add	r3, r3, %[sha256]\n\t"
        "add	r7, r7, r3\n\t"
        "ror	%[sha256], r4, #2\n\t"
        "eor	%[data], r4, r5\n\t"
        "eor	%[sha256], %[sha256], r4, ror #13\n\t"
        "eor	r10, r5, r6\n\t"
        "and	%[data], %[data], r10\n\t"
        "eor	%[sha256], %[sha256], r4, ror #22\n\t"
        "eor	%[data], %[data], r5\n\t"
        "add	r3, r3, %[sha256]\n\t"
        "add	r3, r3, %[data]\n\t"
        /* Round 7 */
        "vmov.32	r10, d3[1]\n\t"
        /* Calc new W[6]-W[7] */
        "vext.8	d10, d3, d4, #4\n\t"
        "ror	%[sha256], r7, #6\n\t"
        "vshl.u32	d8, d2, #15\n\t"
        "eor	%[data], r8, r9\n\t"
        "vsri.u32	d8, d2, #17\n\t"
        "eor	%[sha256], %[sha256], r7, ror #11\n\t"
        "vshl.u32	d9, d2, #13\n\t"
        "and	%[data], %[data], r7\n\t"
        "vsri.u32	d9, d2, #19\n\t"
        "eor	%[sha256], %[sha256], r7, ror #25\n\t"
        "veor	d9, d8\n\t"
        "eor	%[data], %[data], r9\n\t"
        "vshr.u32	d8, d2, #10\n\t"
        "add	%[len], %[len], %[sha256]\n\t"
        "veor	d9, d8\n\t"
        "add	%[len], %[len], %[data]\n\t"
        "vadd.i32	d3, d9\n\t"
        "ldr	%[sha256], [r12, #28]\n\t"
        "vext.8	d11, d7, d0, #4\n\t"
        "add	%[len], %[len], r10\n\t"
        "vadd.i32	d3, d11\n\t"
        "add	%[len], %[len], %[sha256]\n\t"
        "vshl.u32	d8, d10, #25\n\t"
        "add	r6, r6, %[len]\n\t"
        "vsri.u32	d8, d10, #7\n\t"
        "ror	%[sha256], r3, #2\n\t"
        "vshl.u32	d9, d10, #14\n\t"
        "eor	%[data], r3, r4\n\t"
        "vsri.u32	d9, d10, #18\n\t"
        "eor	%[sha256], %[sha256], r3, ror #13\n\t"
        "veor	d9, d8\n\t"
        "eor	r10, r4, r5\n\t"
        "vshr.u32	d10, #3\n\t"
        "and	%[data], %[data], r10\n\t"
        "veor	d9, d10\n\t"
        "eor	%[sha256], %[sha256], r3, ror #22\n\t"
        "vadd.i32	d3, d9\n\t"
        "eor	%[data], %[data], r4\n\t"
        "add	%[len], %[len], %[sha256]\n\t"
        "add	%[len], %[len], %[data]\n\t"
        /* Round 8 */
        "vmov.32	r10, d4[0]\n\t"
        "ror	%[sha256], r6, #6\n\t"
        "eor	%[data], r7, r8\n\t"
        "eor	%[sha256], %[sha256], r6, ror #11\n\t"
        "and	%[data], %[data], r6\n\t"
        "eor	%[sha256], %[sha256], r6, ror #25\n\t"
        "eor	%[data], %[data], r8\n\t"
        "add	r9, r9, %[sha256]\n\t"
        "add	r9, r9, %[data]\n\t"
        "ldr	%[sha256], [r12, #32]\n\t"
        "add	r9, r9, r10\n\t"
        "add	r9, r9, %[sha256]\n\t"
        "add	r5, r5, r9\n\t"
        "ror	%[sha256], %[len], #2\n\t"
        "eor	%[data], %[len], r3\n\t"
        "eor	%[sha256], %[sha256], %[len], ror #13\n\t"
        "eor	r10, r3, r4\n\t"
        "and	%[data], %[data], r10\n\t"
        "eor	%[sha256], %[sha256], %[len], ror #22\n\t"
        "eor	%[data], %[data], r3\n\t"
        "add	r9, r9, %[sha256]\n\t"
        "add	r9, r9, %[data]\n\t"
        /* Round 9 */
        "vmov.32	r10, d4[1]\n\t"
        /* Calc new W[8]-W[9] */
        "vext.8	d10, d4, d5, #4\n\t"
        "ror	%[sha256], r5, #6\n\t"
        "vshl.u32	d8, d3, #15\n\t"
        "eor	%[data], r6, r7\n\t"
        "vsri.u32	d8, d3, #17\n\t"
        "eor	%[sha256], %[sha256], r5, ror #11\n\t"
        "vshl.u32	d9, d3, #13\n\t"
        "and	%[data], %[data], r5\n\t"
        "vsri.u32	d9, d3, #19\n\t"
        "eor	%[sha256], %[sha256], r5, ror #25\n\t"
        "veor	d9, d8\n\t"
        "eor	%[data], %[data], r7\n\t"
        "vshr.u32	d8, d3, #10\n\t"
        "add	r8, r8, %[sha256]\n\t"
        "veor	d9, d8\n\t"
        "add	r8, r8, %[data]\n\t"
        "vadd.i32	d4, d9\n\t"
        "ldr	%[sha256], [r12, #36]\n\t"
        "vext.8	d11, d0, d1, #4\n\t"
        "add	r8, r8, r10\n\t"
        "vadd.i32	d4, d11\n\t"
        "add	r8, r8, %[sha256]\n\t"
        "vshl.u32	d8, d10, #25\n\t"
        "add	r4, r4, r8\n\t"
        "vsri.u32	d8, d10, #7\n\t"
        "ror	%[sha256], r9, #2\n\t"
        "vshl.u32	d9, d10, #14\n\t"
        "eor	%[data], r9, %[len]\n\t"
        "vsri.u32	d9, d10, #18\n\t"
        "eor	%[sha256], %[sha256], r9, ror #13\n\t"
        "veor	d9, d8\n\t"
        "eor	r10, %[len], r3\n\t"
        "vshr.u32	d10, #3\n\t"
        "and	%[data], %[data], r10\n\t"
        "veor	d9, d10\n\t"
        "eor	%[sha256], %[sha256], r9, ror #22\n\t"
        "vadd.i32	d4, d9\n\t"
        "eor	%[data], %[data], %[len]\n\t"
        "add	r8, r8, %[sha256]\n\t"
        "add	r8, r8, %[data]\n\t"
        /* Round 10 */
        "vmov.32	r10, d5[0]\n\t"
        "ror	%[sha256], r4, #6\n\t"
        "eor	%[data], r5, r6\n\t"
        "eor	%[sha256], %[sha256], r4, ror #11\n\t"
        "and	%[data], %[data], r4\n\t"
        "eor	%[sha256], %[sha256], r4, ror #25\n\t"
        "eor	%[data], %[data], r6\n\t"
        "add	r7, r7, %[sha256]\n\t"
        "add	r7, r7, %[data]\n\t"
        "ldr	%[sha256], [r12, #40]\n\t"
        "add	r7, r7, r10\n\t"
        "add	r7, r7, %[sha256]\n\t"
        "add	r3, r3, r7\n\t"
        "ror	%[sha256], r8, #2\n\t"
        "eor	%[data], r8, r9\n\t"
        "eor	%[sha256], %[sha256], r8, ror #13\n\t"
        "eor	r10, r9, %[len]\n\t"
        "and	%[data], %[data], r10\n\t"
        "eor	%[sha256], %[sha256], r8, ror #22\n\t"
        "eor	%[data], %[data], r9\n\t"
        "add	r7, r7, %[sha256]\n\t"
        "add	r7, r7, %[data]\n\t"
        /* Round 11 */
        "vmov.32	r10, d5[1]\n\t"
        /* Calc new W[10]-W[11] */
        "vext.8	d10, d5, d6, #4\n\t"
        "ror	%[sha256], r3, #6\n\t"
        "vshl.u32	d8, d4, #15\n\t"
        "eor	%[data], r4, r5\n\t"
        "vsri.u32	d8, d4, #17\n\t"
        "eor	%[sha256], %[sha256], r3, ror #11\n\t"
        "vshl.u32	d9, d4, #13\n\t"
        "and	%[data], %[data], r3\n\t"
        "vsri.u32	d9, d4, #19\n\t"
        "eor	%[sha256], %[sha256], r3, ror #25\n\t"
        "veor	d9, d8\n\t"
        "eor	%[data], %[data], r5\n\t"
        "vshr.u32	d8, d4, #10\n\t"
        "add	r6, r6, %[sha256]\n\t"
        "veor	d9, d8\n\t"
        "add	r6, r6, %[data]\n\t"
        "vadd.i32	d5, d9\n\t"
        "ldr	%[sha256], [r12, #44]\n\t"
        "vext.8	d11, d1, d2, #4\n\t"
        "add	r6, r6, r10\n\t"
        "vadd.i32	d5, d11\n\t"
        "add	r6, r6, %[sha256]\n\t"
        "vshl.u32	d8, d10, #25\n\t"
        "add	%[len], %[len], r6\n\t"
        "vsri.u32	d8, d10, #7\n\t"
        "ror	%[sha256], r7, #2\n\t"
        "vshl.u32	d9, d10, #14\n\t"
        "eor	%[data], r7, r8\n\t"
        "vsri.u32	d9, d10, #18\n\t"
        "eor	%[sha256], %[sha256], r7, ror #13\n\t"
        "veor	d9, d8\n\t"
        "eor	r10, r8, r9\n\t"
        "vshr.u32	d10, #3\n\t"
        "and	%[data], %[data], r10\n\t"
        "veor	d9, d10\n\t"
        "eor	%[sha256], %[sha256], r7, ror #22\n\t"
        "vadd.i32	d5, d9\n\t"
        "eor	%[data], %[data], r8\n\t"
        "add	r6, r6, %[sha256]\n\t"
        "add	r6, r6, %[data]\n\t"
        /* Round 12 */
        "vmov.32	r10, d6[0]\n\t"
        "ror	%[sha256], %[len], #6\n\t"
        "eor	%[data], r3, r4\n\t"
        "eor	%[sha256], %[sha256], %[len], ror #11\n\t"
        "and	%[data], %[data], %[len]\n\t"
        "eor	%[sha256], %[sha256], %[len], ror #25\n\t"
        "eor	%[data], %[data], r4\n\t"
        "add	r5, r5, %[sha256]\n\t"
        "add	r5, r5, %[data]\n\t"
        "ldr	%[sha256], [r12, #48]\n\t"
        "add	r5, r5, r10\n\t"
        "add	r5, r5, %[sha256]\n\t"
        "add	r9, r9, r5\n\t"
        "ror	%[sha256], r6, #2\n\t"
        "eor	%[data], r6, r7\n\t"
        "eor	%[sha256], %[sha256], r6, ror #13\n\t"
        "eor	r10, r7, r8\n\t"
        "and	%[data], %[data], r10\n\t"
        "eor	%[sha256], %[sha256], r6, ror #22\n\t"
        "eor	%[data], %[data], r7\n\t"
        "add	r5, r5, %[sha256]\n\t"
        "add	r5, r5, %[data]\n\t"
        /* Round 13 */
        "vmov.32	r10, d6[1]\n\t"
        /* Calc new W[12]-W[13] */
        "vext.8	d10, d6, d7, #4\n\t"
        "ror	%[sha256], r9, #6\n\t"
        "vshl.u32	d8, d5, #15\n\t"
        "eor	%[data], %[len], r3\n\t"
        "vsri.u32	d8, d5, #17\n\t"
        "eor	%[sha256], %[sha256], r9, ror #11\n\t"
        "vshl.u32	d9, d5, #13\n\t"
        "and	%[data], %[data], r9\n\t"
        "vsri.u32	d9, d5, #19\n\t"
        "eor	%[sha256], %[sha256], r9, ror #25\n\t"
        "veor	d9, d8\n\t"
        "eor	%[data], %[data], r3\n\t"
        "vshr.u32	d8, d5, #10\n\t"
        "add	r4, r4, %[sha256]\n\t"
        "veor	d9, d8\n\t"
        "add	r4, r4, %[data]\n\t"
        "vadd.i32	d6, d9\n\t"
        "ldr	%[sha256], [r12, #52]\n\t"
        "vext.8	d11, d2, d3, #4\n\t"
        "add	r4, r4, r10\n\t"
        "vadd.i32	d6, d11\n\t"
        "add	r4, r4, %[sha256]\n\t"
        "vshl.u32	d8, d10, #25\n\t"
        "add	r8, r8, r4\n\t"
        "vsri.u32	d8, d10, #7\n\t"
        "ror	%[sha256], r5, #2\n\t"
        "vshl.u32	d9, d10, #14\n\t"
        "eor	%[data], r5, r6\n\t"
        "vsri.u32	d9, d10, #18\n\t"
        "eor	%[sha256], %[sha256], r5, ror #13\n\t"
        "veor	d9, d8\n\t"
        "eor	r10, r6, r7\n\t"
        "vshr.u32	d10, #3\n\t"
        "and	%[data], %[data], r10\n\t"
        "veor	d9, d10\n\t"
        "eor	%[sha256], %[sha256], r5, ror #22\n\t"
        "vadd.i32	d6, d9\n\t"
        "eor	%[data], %[data], r6\n\t"
        "add	r4, r4, %[sha256]\n\t"
        "add	r4, r4, %[data]\n\t"
        /* Round 14 */
        "vmov.32	r10, d7[0]\n\t"
        "ror	%[sha256], r8, #6\n\t"
        "eor	%[data], r9, %[len]\n\t"
        "eor	%[sha256], %[sha256], r8, ror #11\n\t"
        "and	%[data], %[data], r8\n\t"
        "eor	%[sha256], %[sha256], r8, ror #25\n\t"
        "eor	%[data], %[data], %[len]\n\t"
        "add	r3, r3, %[sha256]\n\t"
        "add	r3, r3, %[data]\n\t"
        "ldr	%[sha256], [r12, #56]\n\t"
        "add	r3, r3, r10\n\t"
        "add	r3, r3, %[sha256]\n\t"
        "add	r7, r7, r3\n\t"
        "ror	%[sha256], r4, #2\n\t"
        "eor	%[data], r4, r5\n\t"
        "eor	%[sha256], %[sha256], r4, ror #13\n\t"
        "eor	r10, r5, r6\n\t"
        "and	%[data], %[data], r10\n\t"
        "eor	%[sha256], %[sha256], r4, ror #22\n\t"
        "eor	%[data], %[data], r5\n\t"
        "add	r3, r3, %[sha256]\n\t"
        "add	r3, r3, %[data]\n\t"
        /* Round 15 */
        "vmov.32	r10, d7[1]\n\t"
        /* Calc new W[14]-W[15] */
        "vext.8	d10, d7, d0, #4\n\t"
        "ror	%[sha256], r7, #6\n\t"
        "vshl.u32	d8, d6, #15\n\t"
        "eor	%[data], r8, r9\n\t"
        "vsri.u32	d8, d6, #17\n\t"
        "eor	%[sha256], %[sha256], r7, ror #11\n\t"
        "vshl.u32	d9, d6, #13\n\t"
        "and	%[data], %[data], r7\n\t"
        "vsri.u32	d9, d6, #19\n\t"
        "eor	%[sha256], %[sha256], r7, ror #25\n\t"
        "veor	d9, d8\n\t"
        "eor	%[data], %[data], r9\n\t"
        "vshr.u32	d8, d6, #10\n\t"
        "add	%[len], %[len], %[sha256]\n\t"
        "veor	d9, d8\n\t"
        "add	%[len], %[len], %[data]\n\t"
        "vadd.i32	d7, d9\n\t"
        "ldr	%[sha256], [r12, #60]\n\t"
        "vext.8	d11, d3, d4, #4\n\t"
        "add	%[len], %[len], r10\n\t"
        "vadd.i32	d7, d11\n\t"
        "add	%[len], %[len], %[sha256]\n\t"
        "vshl.u32	d8, d10, #25\n\t"
        "add	r6, r6, %[len]\n\t"
        "vsri.u32	d8, d10, #7\n\t"
        "ror	%[sha256], r3, #2\n\t"
        "vshl.u32	d9, d10, #14\n\t"
        "eor	%[data], r3, r4\n\t"
        "vsri.u32	d9, d10, #18\n\t"
        "eor	%[sha256], %[sha256], r3, ror #13\n\t"
        "veor	d9, d8\n\t"
        "eor	r10, r4, r5\n\t"
        "vshr.u32	d10, #3\n\t"
        "and	%[data], %[data], r10\n\t"
        "veor	d9, d10\n\t"
        "eor	%[sha256], %[sha256], r3, ror #22\n\t"
        "vadd.i32	d7, d9\n\t"
        "eor	%[data], %[data], r4\n\t"
        "add	%[len], %[len], %[sha256]\n\t"
        "add	%[len], %[len], %[data]\n\t"
        "add	r12, r12, #0x40\n\t"
        "subs	lr, lr, #1\n\t"
        "bne	L_SHA256_transform_neon_len_start_%=\n\t"
        /* Round 0 */
        "vmov.32	r10, d0[0]\n\t"
        "ror	%[sha256], r6, #6\n\t"
        "eor	%[data], r7, r8\n\t"
        "eor	%[sha256], %[sha256], r6, ror #11\n\t"
        "and	%[data], %[data], r6\n\t"
        "eor	%[sha256], %[sha256], r6, ror #25\n\t"
        "eor	%[data], %[data], r8\n\t"
        "add	r9, r9, %[sha256]\n\t"
        "add	r9, r9, %[data]\n\t"
        "ldr	%[sha256], [r12]\n\t"
        "add	r9, r9, r10\n\t"
        "add	r9, r9, %[sha256]\n\t"
        "add	r5, r5, r9\n\t"
        "ror	%[sha256], %[len], #2\n\t"
        "eor	%[data], %[len], r3\n\t"
        "eor	%[sha256], %[sha256], %[len], ror #13\n\t"
        "eor	r10, r3, r4\n\t"
        "and	%[data], %[data], r10\n\t"
        "eor	%[sha256], %[sha256], %[len], ror #22\n\t"
        "eor	%[data], %[data], r3\n\t"
        "add	r9, r9, %[sha256]\n\t"
        "add	r9, r9, %[data]\n\t"
        /* Round 1 */
        "vmov.32	r10, d0[1]\n\t"
        "ror	%[sha256], r5, #6\n\t"
        "eor	%[data], r6, r7\n\t"
        "eor	%[sha256], %[sha256], r5, ror #11\n\t"
        "and	%[data], %[data], r5\n\t"
        "eor	%[sha256], %[sha256], r5, ror #25\n\t"
        "eor	%[data], %[data], r7\n\t"
        "add	r8, r8, %[sha256]\n\t"
        "add	r8, r8, %[data]\n\t"
        "ldr	%[sha256], [r12, #4]\n\t"
        "add	r8, r8, r10\n\t"
        "add	r8, r8, %[sha256]\n\t"
        "add	r4, r4, r8\n\t"
        "ror	%[sha256], r9, #2\n\t"
        "eor	%[data], r9, %[len]\n\t"
        "eor	%[sha256], %[sha256], r9, ror #13\n\t"
        "eor	r10, %[len], r3\n\t"
        "and	%[data], %[data], r10\n\t"
        "eor	%[sha256], %[sha256], r9, ror #22\n\t"
        "eor	%[data], %[data], %[len]\n\t"
        "add	r8, r8, %[sha256]\n\t"
        "add	r8, r8, %[data]\n\t"
        /* Round 2 */
        "vmov.32	r10, d1[0]\n\t"
        "ror	%[sha256], r4, #6\n\t"
        "eor	%[data], r5, r6\n\t"
        "eor	%[sha256], %[sha256], r4, ror #11\n\t"
        "and	%[data], %[data], r4\n\t"
        "eor	%[sha256], %[sha256], r4, ror #25\n\t"
        "eor	%[data], %[data], r6\n\t"
        "add	r7, r7, %[sha256]\n\t"
        "add	r7, r7, %[data]\n\t"
        "ldr	%[sha256], [r12, #8]\n\t"
        "add	r7, r7, r10\n\t"
        "add	r7, r7, %[sha256]\n\t"
        "add	r3, r3, r7\n\t"
        "ror	%[sha256], r8, #2\n\t"
        "eor	%[data], r8, r9\n\t"
        "eor	%[sha256], %[sha256], r8, ror #13\n\t"
        "eor	r10, r9, %[len]\n\t"
        "and	%[data], %[data], r10\n\t"
        "eor	%[sha256], %[sha256], r8, ror #22\n\t"
        "eor	%[data], %[data], r9\n\t"
        "add	r7, r7, %[sha256]\n\t"
        "add	r7, r7, %[data]\n\t"
        /* Round 3 */
        "vmov.32	r10, d1[1]\n\t"
        "ror	%[sha256], r3, #6\n\t"
        "eor	%[data], r4, r5\n\t"
        "eor	%[sha256], %[sha256], r3, ror #11\n\t"
        "and	%[data], %[data], r3\n\t"
        "eor	%[sha256], %[sha256], r3, ror #25\n\t"
        "eor	%[data], %[data], r5\n\t"
        "add	r6, r6, %[sha256]\n\t"
        "add	r6, r6, %[data]\n\t"
        "ldr	%[sha256], [r12, #12]\n\t"
        "add	r6, r6, r10\n\t"
        "add	r6, r6, %[sha256]\n\t"
        "add	%[len], %[len], r6\n\t"
        "ror	%[sha256], r7, #2\n\t"
        "eor	%[data], r7, r8\n\t"
        "eor	%[sha256], %[sha256], r7, ror #13\n\t"
        "eor	r10, r8, r9\n\t"
        "and	%[data], %[data], r10\n\t"
        "eor	%[sha256], %[sha256], r7, ror #22\n\t"
        "eor	%[data], %[data], r8\n\t"
        "add	r6, r6, %[sha256]\n\t"
        "add	r6, r6, %[data]\n\t"
        /* Round 4 */
        "vmov.32	r10, d2[0]\n\t"
        "ror	%[sha256], %[len], #6\n\t"
        "eor	%[data], r3, r4\n\t"
        "eor	%[sha256], %[sha256], %[len], ror #11\n\t"
        "and	%[data], %[data], %[len]\n\t"
        "eor	%[sha256], %[sha256], %[len], ror #25\n\t"
        "eor	%[data], %[data], r4\n\t"
        "add	r5, r5, %[sha256]\n\t"
        "add	r5, r5, %[data]\n\t"
        "ldr	%[sha256], [r12, #16]\n\t"
        "add	r5, r5, r10\n\t"
        "add	r5, r5, %[sha256]\n\t"
        "add	r9, r9, r5\n\t"
        "ror	%[sha256], r6, #2\n\t"
        "eor	%[data], r6, r7\n\t"
        "eor	%[sha256], %[sha256], r6, ror #13\n\t"
        "eor	r10, r7, r8\n\t"
        "and	%[data], %[data], r10\n\t"
        "eor	%[sha256], %[sha256], r6, ror #22\n\t"
        "eor	%[data], %[data], r7\n\t"
        "add	r5, r5, %[sha256]\n\t"
        "add	r5, r5, %[data]\n\t"
        /* Round 5 */
        "vmov.32	r10, d2[1]\n\t"
        "ror	%[sha256], r9, #6\n\t"
        "eor	%[data], %[len], r3\n\t"
        "eor	%[sha256], %[sha256], r9, ror #11\n\t"
        "and	%[data], %[data], r9\n\t"
        "eor	%[sha256], %[sha256], r9, ror #25\n\t"
        "eor	%[data], %[data], r3\n\t"
        "add	r4, r4, %[sha256]\n\t"
        "add	r4, r4, %[data]\n\t"
        "ldr	%[sha256], [r12, #20]\n\t"
        "add	r4, r4, r10\n\t"
        "add	r4, r4, %[sha256]\n\t"
        "add	r8, r8, r4\n\t"
        "ror	%[sha256], r5, #2\n\t"
        "eor	%[data], r5, r6\n\t"
        "eor	%[sha256], %[sha256], r5, ror #13\n\t"
        "eor	r10, r6, r7\n\t"
        "and	%[data], %[data], r10\n\t"
        "eor	%[sha256], %[sha256], r5, ror #22\n\t"
        "eor	%[data], %[data], r6\n\t"
        "add	r4, r4, %[sha256]\n\t"
        "add	r4, r4, %[data]\n\t"
        /* Round 6 */
        "vmov.32	r10, d3[0]\n\t"
        "ror	%[sha256], r8, #6\n\t"
        "eor	%[data], r9, %[len]\n\t"
        "eor	%[sha256], %[sha256], r8, ror #11\n\t"
        "and	%[data], %[data], r8\n\t"
        "eor	%[sha256], %[sha256], r8, ror #25\n\t"
        "eor	%[data], %[data], %[len]\n\t"
        "add	r3, r3, %[sha256]\n\t"
        "add	r3, r3, %[data]\n\t"
        "ldr	%[sha256], [r12, #24]\n\t"
        "add	r3, r3, r10\n\t"
        "add	r3, r3, %[sha256]\n\t"
        "add	r7, r7, r3\n\t"
        "ror	%[sha256], r4, #2\n\t"
        "eor	%[data], r4, r5\n\t"
        "eor	%[sha256], %[sha256], r4, ror #13\n\t"
        "eor	r10, r5, r6\n\t"
        "and	%[data], %[data], r10\n\t"
        "eor	%[sha256], %[sha256], r4, ror #22\n\t"
        "eor	%[data], %[data], r5\n\t"
        "add	r3, r3, %[sha256]\n\t"
        "add	r3, r3, %[data]\n\t"
        /* Round 7 */
        "vmov.32	r10, d3[1]\n\t"
        "ror	%[sha256], r7, #6\n\t"
        "eor	%[data], r8, r9\n\t"
        "eor	%[sha256], %[sha256], r7, ror #11\n\t"
        "and	%[data], %[data], r7\n\t"
        "eor	%[sha256], %[sha256], r7, ror #25\n\t"
        "eor	%[data], %[data], r9\n\t"
        "add	%[len], %[len], %[sha256]\n\t"
        "add	%[len], %[len], %[data]\n\t"
        "ldr	%[sha256], [r12, #28]\n\t"
        "add	%[len], %[len], r10\n\t"
        "add	%[len], %[len], %[sha256]\n\t"
        "add	r6, r6, %[len]\n\t"
        "ror	%[sha256], r3, #2\n\t"
        "eor	%[data], r3, r4\n\t"
        "eor	%[sha256], %[sha256], r3, ror #13\n\t"
        "eor	r10, r4, r5\n\t"
        "and	%[data], %[data], r10\n\t"
        "eor	%[sha256], %[sha256], r3, ror #22\n\t"
        "eor	%[data], %[data], r4\n\t"
        "add	%[len], %[len], %[sha256]\n\t"
        "add	%[len], %[len], %[data]\n\t"
        /* Round 8 */
        "vmov.32	r10, d4[0]\n\t"
        "ror	%[sha256], r6, #6\n\t"
        "eor	%[data], r7, r8\n\t"
        "eor	%[sha256], %[sha256], r6, ror #11\n\t"
        "and	%[data], %[data], r6\n\t"
        "eor	%[sha256], %[sha256], r6, ror #25\n\t"
        "eor	%[data], %[data], r8\n\t"
        "add	r9, r9, %[sha256]\n\t"
        "add	r9, r9, %[data]\n\t"
        "ldr	%[sha256], [r12, #32]\n\t"
        "add	r9, r9, r10\n\t"
        "add	r9, r9, %[sha256]\n\t"
        "add	r5, r5, r9\n\t"
        "ror	%[sha256], %[len], #2\n\t"
        "eor	%[data], %[len], r3\n\t"
        "eor	%[sha256], %[sha256], %[len], ror #13\n\t"
        "eor	r10, r3, r4\n\t"
        "and	%[data], %[data], r10\n\t"
        "eor	%[sha256], %[sha256], %[len], ror #22\n\t"
        "eor	%[data], %[data], r3\n\t"
        "add	r9, r9, %[sha256]\n\t"
        "add	r9, r9, %[data]\n\t"
        /* Round 9 */
        "vmov.32	r10, d4[1]\n\t"
        "ror	%[sha256], r5, #6\n\t"
        "eor	%[data], r6, r7\n\t"
        "eor	%[sha256], %[sha256], r5, ror #11\n\t"
        "and	%[data], %[data], r5\n\t"
        "eor	%[sha256], %[sha256], r5, ror #25\n\t"
        "eor	%[data], %[data], r7\n\t"
        "add	r8, r8, %[sha256]\n\t"
        "add	r8, r8, %[data]\n\t"
        "ldr	%[sha256], [r12, #36]\n\t"
        "add	r8, r8, r10\n\t"
        "add	r8, r8, %[sha256]\n\t"
        "add	r4, r4, r8\n\t"
        "ror	%[sha256], r9, #2\n\t"
        "eor	%[data], r9, %[len]\n\t"
        "eor	%[sha256], %[sha256], r9, ror #13\n\t"
        "eor	r10, %[len], r3\n\t"
        "and	%[data], %[data], r10\n\t"
        "eor	%[sha256], %[sha256], r9, ror #22\n\t"
        "eor	%[data], %[data], %[len]\n\t"
        "add	r8, r8, %[sha256]\n\t"
        "add	r8, r8, %[data]\n\t"
        /* Round 10 */
        "vmov.32	r10, d5[0]\n\t"
        "ror	%[sha256], r4, #6\n\t"
        "eor	%[data], r5, r6\n\t"
        "eor	%[sha256], %[sha256], r4, ror #11\n\t"
        "and	%[data], %[data], r4\n\t"
        "eor	%[sha256], %[sha256], r4, ror #25\n\t"
        "eor	%[data], %[data], r6\n\t"
        "add	r7, r7, %[sha256]\n\t"
        "add	r7, r7, %[data]\n\t"
        "ldr	%[sha256], [r12, #40]\n\t"
        "add	r7, r7, r10\n\t"
        "add	r7, r7, %[sha256]\n\t"
        "add	r3, r3, r7\n\t"
        "ror	%[sha256], r8, #2\n\t"
        "eor	%[data], r8, r9\n\t"
        "eor	%[sha256], %[sha256], r8, ror #13\n\t"
        "eor	r10, r9, %[len]\n\t"
        "and	%[data], %[data], r10\n\t"
        "eor	%[sha256], %[sha256], r8, ror #22\n\t"
        "eor	%[data], %[data], r9\n\t"
        "add	r7, r7, %[sha256]\n\t"
        "add	r7, r7, %[data]\n\t"
        /* Round 11 */
        "vmov.32	r10, d5[1]\n\t"
        "ror	%[sha256], r3, #6\n\t"
        "eor	%[data], r4, r5\n\t"
        "eor	%[sha256], %[sha256], r3, ror #11\n\t"
        "and	%[data], %[data], r3\n\t"
        "eor	%[sha256], %[sha256], r3, ror #25\n\t"
        "eor	%[data], %[data], r5\n\t"
        "add	r6, r6, %[sha256]\n\t"
        "add	r6, r6, %[data]\n\t"
        "ldr	%[sha256], [r12, #44]\n\t"
        "add	r6, r6, r10\n\t"
        "add	r6, r6, %[sha256]\n\t"
        "add	%[len], %[len], r6\n\t"
        "ror	%[sha256], r7, #2\n\t"
        "eor	%[data], r7, r8\n\t"
        "eor	%[sha256], %[sha256], r7, ror #13\n\t"
        "eor	r10, r8, r9\n\t"
        "and	%[data], %[data], r10\n\t"
        "eor	%[sha256], %[sha256], r7, ror #22\n\t"
        "eor	%[data], %[data], r8\n\t"
        "add	r6, r6, %[sha256]\n\t"
        "add	r6, r6, %[data]\n\t"
        /* Round 12 */
        "vmov.32	r10, d6[0]\n\t"
        "ror	%[sha256], %[len], #6\n\t"
        "eor	%[data], r3, r4\n\t"
        "eor	%[sha256], %[sha256], %[len], ror #11\n\t"
        "and	%[data], %[data], %[len]\n\t"
        "eor	%[sha256], %[sha256], %[len], ror #25\n\t"
        "eor	%[data], %[data], r4\n\t"
        "add	r5, r5, %[sha256]\n\t"
        "add	r5, r5, %[data]\n\t"
        "ldr	%[sha256], [r12, #48]\n\t"
        "add	r5, r5, r10\n\t"
        "add	r5, r5, %[sha256]\n\t"
        "add	r9, r9, r5\n\t"
        "ror	%[sha256], r6, #2\n\t"
        "eor	%[data], r6, r7\n\t"
        "eor	%[sha256], %[sha256], r6, ror #13\n\t"
        "eor	r10, r7, r8\n\t"
        "and	%[data], %[data], r10\n\t"
        "eor	%[sha256], %[sha256], r6, ror #22\n\t"
        "eor	%[data], %[data], r7\n\t"
        "add	r5, r5, %[sha256]\n\t"
        "add	r5, r5, %[data]\n\t"
        /* Round 13 */
        "vmov.32	r10, d6[1]\n\t"
        "ror	%[sha256], r9, #6\n\t"
        "eor	%[data], %[len], r3\n\t"
        "eor	%[sha256], %[sha256], r9, ror #11\n\t"
        "and	%[data], %[data], r9\n\t"
        "eor	%[sha256], %[sha256], r9, ror #25\n\t"
        "eor	%[data], %[data], r3\n\t"
        "add	r4, r4, %[sha256]\n\t"
        "add	r4, r4, %[data]\n\t"
        "ldr	%[sha256], [r12, #52]\n\t"
        "add	r4, r4, r10\n\t"
        "add	r4, r4, %[sha256]\n\t"
        "add	r8, r8, r4\n\t"
        "ror	%[sha256], r5, #2\n\t"
        "eor	%[data], r5, r6\n\t"
        "eor	%[sha256], %[sha256], r5, ror #13\n\t"
        "eor	r10, r6, r7\n\t"
        "and	%[data], %[data], r10\n\t"
        "eor	%[sha256], %[sha256], r5, ror #22\n\t"
        "eor	%[data], %[data], r6\n\t"
        "add	r4, r4, %[sha256]\n\t"
        "add	r4, r4, %[data]\n\t"
        /* Round 14 */
        "vmov.32	r10, d7[0]\n\t"
        "ror	%[sha256], r8, #6\n\t"
        "eor	%[data], r9, %[len]\n\t"
        "eor	%[sha256], %[sha256], r8, ror #11\n\t"
        "and	%[data], %[data], r8\n\t"
        "eor	%[sha256], %[sha256], r8, ror #25\n\t"
        "eor	%[data], %[data], %[len]\n\t"
        "add	r3, r3, %[sha256]\n\t"
        "add	r3, r3, %[data]\n\t"
        "ldr	%[sha256], [r12, #56]\n\t"
        "add	r3, r3, r10\n\t"
        "add	r3, r3, %[sha256]\n\t"
        "add	r7, r7, r3\n\t"
        "ror	%[sha256], r4, #2\n\t"
        "eor	%[data], r4, r5\n\t"
        "eor	%[sha256], %[sha256], r4, ror #13\n\t"
        "eor	r10, r5, r6\n\t"
        "and	%[data], %[data], r10\n\t"
        "eor	%[sha256], %[sha256], r4, ror #22\n\t"
        "eor	%[data], %[data], r5\n\t"
        "add	r3, r3, %[sha256]\n\t"
        "add	r3, r3, %[data]\n\t"
        /* Round 15 */
        "vmov.32	r10, d7[1]\n\t"
        "ror	%[sha256], r7, #6\n\t"
        "eor	%[data], r8, r9\n\t"
        "eor	%[sha256], %[sha256], r7, ror #11\n\t"
        "and	%[data], %[data], r7\n\t"
        "eor	%[sha256], %[sha256], r7, ror #25\n\t"
        "eor	%[data], %[data], r9\n\t"
        "add	%[len], %[len], %[sha256]\n\t"
        "add	%[len], %[len], %[data]\n\t"
        "ldr	%[sha256], [r12, #60]\n\t"
        "add	%[len], %[len], r10\n\t"
        "add	%[len], %[len], %[sha256]\n\t"
        "add	r6, r6, %[len]\n\t"
        "ror	%[sha256], r3, #2\n\t"
        "eor	%[data], r3, r4\n\t"
        "eor	%[sha256], %[sha256], r3, ror #13\n\t"
        "eor	r10, r4, r5\n\t"
        "and	%[data], %[data], r10\n\t"
        "eor	%[sha256], %[sha256], r3, ror #22\n\t"
        "eor	%[data], %[data], r4\n\t"
        "add	%[len], %[len], %[sha256]\n\t"
        "add	%[len], %[len], %[data]\n\t"
        "ldr	r10, [sp]\n\t"
        /* Add in digest from start */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "ldr	%[sha256], [r10]\n\t"
        "ldr	%[data], [r10, #4]\n\t"
#else
        "ldrd	%[sha256], %[data], [r10]\n\t"
#endif
        "add	%[len], %[len], %[sha256]\n\t"
        "add	r3, r3, %[data]\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "str	%[len], [r10]\n\t"
        "str	r3, [r10, #4]\n\t"
#else
        "strd	%[len], r3, [r10]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "ldr	%[sha256], [r10, #8]\n\t"
        "ldr	%[data], [r10, #12]\n\t"
#else
        "ldrd	%[sha256], %[data], [r10, #8]\n\t"
#endif
        "add	r4, r4, %[sha256]\n\t"
        "add	r5, r5, %[data]\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "str	r4, [r10, #8]\n\t"
        "str	r5, [r10, #12]\n\t"
#else
        "strd	r4, r5, [r10, #8]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "ldr	%[sha256], [r10, #16]\n\t"
        "ldr	%[data], [r10, #20]\n\t"
#else
        "ldrd	%[sha256], %[data], [r10, #16]\n\t"
#endif
        "add	r6, r6, %[sha256]\n\t"
        "add	r7, r7, %[data]\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "str	r6, [r10, #16]\n\t"
        "str	r7, [r10, #20]\n\t"
#else
        "strd	r6, r7, [r10, #16]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "ldr	%[sha256], [r10, #24]\n\t"
        "ldr	%[data], [r10, #28]\n\t"
#else
        "ldrd	%[sha256], %[data], [r10, #24]\n\t"
#endif
        "add	r8, r8, %[sha256]\n\t"
        "add	r9, r9, %[data]\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
        "str	r8, [r10, #24]\n\t"
        "str	r9, [r10, #28]\n\t"
#else
        "strd	r8, r9, [r10, #24]\n\t"
#endif
        "ldr	r10, [sp, #8]\n\t"
        "ldr	%[data], [sp, #4]\n\t"
        "subs	r10, r10, #0x40\n\t"
        "sub	r12, r12, #0xc0\n\t"
        "str	r10, [sp, #8]\n\t"
        "bne	L_SHA256_transform_neon_len_begin_%=\n\t"
        "add	sp, sp, #24\n\t"
        : [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len), [L_SHA256_transform_neon_len_k] "+r" (L_SHA256_transform_neon_len_k_c)
        :
        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "cc"
    );
}

#endif /* WOLFSSL_ARMASM_NO_NEON */
#endif /* !NO_SHA256 */
#endif /* !__aarch64__ && __arm__ && !__thumb__ */
#endif /* WOLFSSL_ARMASM */
#endif /* !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__) */
#endif /* WOLFSSL_ARMASM */

#endif /* WOLFSSL_ARMASM_INLINE */
